#!/usr/bin/perl -w
# $Id: edgeproxy,v 1.18 2006-03-13 14:05:49 goodell Exp $
$license = <<EOF
Copyright (c) 2005 Geoffrey Goodell.

This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA  02111-1307, USA.

EOF
;

# derived from The Perl Cookbook (O'Reilly), Recipe 17.19.
# http://www.unix.org.ua/orelly/perl/cookbook/ch17_19.htm
# fwdport -- act as proxy forwarder for dedicated services

use strict;                 # require declarations
use Getopt::Long;           # for option processing
use Net::hostent;           # by-name interface for host info
use IO::Socket;             # for creating server and client sockets
use POSIX ":sys_wait_h";    # for reaping our dead children

use vars qw($license);

my (
    %Children,              # hash of outstanding child processes
    $REMOTE,                # whom we connect to on the outside
    $LOCAL,                 # where we listen to on the inside
    $SERVICE,               # our service name or port number
    $proxy_server,          # the socket we accept() from
    $ME,                    # basename of this program
);

my $DEBUG = 0;              # debug level

($ME = $0) =~ s,.*/,,;      # retain just basename of script name

check_args();               # processing switches
start_proxy();              # launch our own server
service_clients();          # wait for incoming
die "NOT REACHED";          # you can't get here from there

# process command line switches using the extended
# version of the getopts library.
sub check_args {
    GetOptions(
        "remote=s"    => \$REMOTE,
        "local=s"     => \$LOCAL,
        "service=s"   => \$SERVICE,
        "debug=s"     => \$DEBUG,
    ) or die <<EOUSAGE;
    usage: $0 [ --remote host ] [ --local interface ] [ --service service ] [ --debug level ]
EOUSAGE
    die "Need remote"                   unless $REMOTE;
    die "Need local or service"         unless $LOCAL || $SERVICE;
}

sub license() {
    print $license; exit 0;
}

sub log_info($$) {
    my ($debuglevel, $line) = (shift, shift);
    chomp $line;
    print STDERR "$line\n" if $debuglevel < $DEBUG;
}

sub append_exit($$) {
    my ($host, $router) = (shift, shift);
    if ($host !~ /\.[A-Za-z0-9-]+\.exit(:[0-9+])?$/
            and $host !~ /\.[cq]\.[A-Za-z0-9-.]+\.blossom(:[0-9+])?$/) {
        if($host =~ /^(\S+)(:[0-9]+)$/) {
            $host = $1 . ".$router" . $2;
        } else {
            $host .= ".$router";
        }
    }
    return $host;
}

# begin our server
sub start_proxy {
    my @proxy_server_config = (
      Proto     => 'tcp',
      Reuse     => 1,
      Listen    => SOMAXCONN,
    );
    push @proxy_server_config, LocalPort => $SERVICE if $SERVICE;
    push @proxy_server_config, LocalAddr => $LOCAL   if $LOCAL;
    $proxy_server = IO::Socket::INET->new(@proxy_server_config)
                    or die "can't create proxy server: $@";
    log_info(1, "[proxy server on " . ($LOCAL || $SERVICE) . " initialized]");
}

sub service_clients {
    my (
        $local_client,      # someone internal wanting out
        $lc_info,           # local client's name/port information
        $remote_server,     # the socket for escaping out
        @rs_config,         # temp array for remote socket options
        $rs_info,           # remote server's name/port information
        $kidpid,            # spawned child for each connection
    );

    $SIG{CHLD} = \&REAPER;  # harvest the moribund

    accepting();

    # an accepted connection here means someone inside wants out
    while (1) {
        while ($local_client = $proxy_server->accept()) {
            $lc_info = peerinfo($local_client);
            set_state("servicing local $lc_info");
            log_info(1, "[received connect from $lc_info]");

            @rs_config = (
                Proto     => 'tcp',
                PeerAddr  => $REMOTE,
            );
            push(@rs_config, PeerPort => $SERVICE) if $SERVICE;

            log_info(1, "[connecting to $REMOTE]");
            set_state("connecting to $REMOTE"); # see below
            $remote_server = IO::Socket::INET->new(@rs_config);

            if(not defined $remote_server) {
                next;
            }

            $rs_info = peerinfo($remote_server);
            set_state("connected to $rs_info");

            $kidpid = fork();
            die "Cannot fork" unless defined $kidpid;
            if ($kidpid) {
                $Children{$kidpid} = time();    # remember his start time
                close $remote_server;           # no use to master
                close $local_client;            # likewise
                next;                           # go get another client
            }

            # at this point, we are the forked child process dedicated
            # to the incoming client.  but we want a twin to make i/o
            # easier.

            close $proxy_server;                # no use to slave

            pipe READER, WRITER;

            $kidpid = fork();
            die "Cannot fork" unless defined $kidpid;

            # now each twin sits around and ferries lines of data.
            # see how simple the algorithm is when you can have
            # multiple threads of control?

            # this is the fork's parent, the master's child
            if ($kidpid) {
                close WRITER;

                my $proxypath = "";
                my $reverseproxy = undef;

                my $router = <READER>;
                if($router) {
                    chomp $router;
                } else {
                    $router = "";
                }
                if($router =~ /\+(\S+)$/) {
                    $proxypath = "/proxy/";
                    $reverseproxy = $proxypath . "http://$1";
                    $router =~ s/\+\S+$//;
                    log_info(1, "reverse proxy: $reverseproxy");
                }
                $router = undef if $router eq "";

                set_state("$rs_info --> $lc_info");
                select($local_client); $| = 1;

                # Perform substitution for A, IMG, and LINK tags in HTML documents

                my $html        = undef;
                my $length      = undef;
                my $content     = "";
                my $headers     = "";
                my $type        = "";

                while(<$remote_server>) {
                    log_info(1, "    recv: $_");
                    if(/^Content-Type: (\S+?)(;.*)?\r$/i) {
                        $type = $1;
                        $html = $type if $type =~ /^text\/html$/;
                        $headers .= $_;
                    } elsif(/^Content-Length: (\S+)\r$/i) {
                        $length = $1;
                        $headers .= $_;
                    } elsif(/^Location: (https?:\/\/)([A-Za-z0-9.-]+)(:[0-9]+)?(\/?)(.*?)\r/i) {
                        my ($pre, $host, $port, $post, $rest) = ($1, $2, $3, $4, $5);

                        $port = "" if not $port;

                        my $before = "$pre$host$port$post";

                        $host = append_exit($host, $router) if $router;
                        $pre = "$proxypath$pre" if $reverseproxy;

                        my $after = "$pre$host$port$post";

                        log_info(2, "converting: Location: $before --> Location: $after");

                        $headers .= "Location: $after$rest\r\n";
                    } elsif($reverseproxy and /^Location: \/(.*)\r$/) {
                        my $before = "/$1";
                        my $after = "$reverseproxy$before";

                        log_info(2, "converting: Location: $before --> Location: $after");

                        $headers .= "Location: $after\n";
                    } elsif(/^\r$/) {
                        $headers .= $_;
                        last;
                    } else {
                        $headers .= $_;
                    }
                }

                if($html) {
                    log_info(0, "    data: $type (recognized as HTML)");
                    my $next = "";

                    while(<$remote_server>) {
                        my $line = $_;

                        unless($router or $reverseproxy) {
                            $content .= $_;
                            next;
                        }

                        $line = "$next $line";
                        chomp $line;
                        $next = "";

                        while($line) {
                            if($line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)(https?:\/\/)([A-Za-z0-9.-]+)(:[0-9]+)?(\/|\")(.*)$/i) {
                                my ($tag, $attr, $label, $quote, $pre, $host, $port, $post, $rest)
                                    = ($1, $2, $3, $4, $5, $6, $7, $8, $9);

                                $port = "" if not $port;

                                my $before = "$tag$attr$label=$quote$pre$host$port";

                                # normalize
                                $tag =~ y/A-Z/a-z/;
                                $host =~ y/A-Z/a-z/;

                                if($router) {
                                    $host = append_exit($host, $router);
                                    log_info(1, "<$tag tag: $host>");
                                }

                                my $after = "$tag$attr$label=$quote$proxypath$pre$host$port";

                                log_info(2, "converting: <$before --> <$after");

                                $content .= "<$after";
                                $line = "$post$rest";
                            } elsif($reverseproxy and $line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) {
                                my ($tag, $attr, $label, $quote, $rest) = ($1, $2, $3, $4, $5);

                                my $before = "$tag$attr$label=$quote/";

                                # normalize
                                $tag =~ y/A-Z/a-z/;

                                my $after = "$tag$attr$label=$quote$reverseproxy/";

                                log_info(2, "converting: <$before --> <$after");

                                $content .= "<$after";
                                $line = $rest;
                            } elsif($line =~ /^(<.*?>)(.*)$/) {
                                $content .= $1;
                                $line = $2;
                            } elsif($line =~ /^(<.*)$/) {
                                $next = $1;
                                $line = undef;
                            } elsif($line =~ /^(.*?)(<.*)$/) {
                                $content .= $1;
                                $line = $2;
                            } else {
                                $content .= $line;
                                $line = undef;
                            }
                        }
                        $content .= "\n";
                    }

                    foreach my $line (split /\n/, $headers) {
                        if($line =~ /^Content-Length: (\S+)\r$/i) {
                            $length = length $content;
                            print "Content-Length: $length\n";
                        } else {
                            print "$line\n";
                        }
                    }
                    print $content;

                } elsif($length) {
                    my $data = "";
                    print $headers;

                    log_info(0, "    data: $type [$length]");
                    read($remote_server, $data, $length) or die "    error: $?";
                    print "$data\n";
                } else {
                    print $headers;
                    if($type) {
                        log_info(0, "    data: $type");
                    } else {
                        log_info(0, "    data: unspecified type");
                    }
                    while(<$remote_server>) {
                        print;
                    }
                }

                kill('TERM', $kidpid);          # kill my twin cause we're done
            }
            # this is the fork's child, the master's grandchild
            else {
                close READER;

                set_state("$rs_info <-- $lc_info");
                select($remote_server); $| = 1;

                # Perform HTTP Host field substitution

                my $post = 1;
                while($post) {
                    my $length = 0;
                    my $reverseproxy = undef;

                    $post = undef;

                    while(<$local_client>) {
                        if(/^Content-Length: (\S+)\r$/i) {
                            $length = $1;
                            print;
                            log_info(1, "    send: $_");
                        } elsif(/^Host: (\S+)\r$/i) {
                            my $router = "";
                            my $repl = $reverseproxy || $1;
                            if($repl =~ /\.[A-Za-z0-9-]+\.exit(:[0-9]+)?$/) {
                                $repl =~ s/\.([A-Za-z0-9-]+\.exit)((:[0-9]+)?)$/$2/;
                                $router = $1;
                            }
                            if($repl =~ /\.[cq]\.[A-Za-z0-9-.]+\.blossom(:[0-9]+)?$/) {
                                $repl =~ s/\.([cq]\.[A-Za-z0-9-.]+\.blossom)((:[0-9]+)?)$/$2/;
                                $router = $1;
                            }
                            log_info(0, "transmitting router: [$router]");
                            $router .= "+$reverseproxy" if $reverseproxy;
                            print WRITER "$router\n";
                            close WRITER;

                            $repl = "Host: $repl\r\n";
                            log_info(1, "    send: $repl");
                            print $repl;
                        } elsif(/^(GET|POST) /) {
                            my $line = $_;
                            my $type = $1;
                            if($line =~ /^$type \/http:\/\/([^\/]+)\//) {
                                $reverseproxy = $1;
                                $line =~ s/^$type \//$type /;
                            }
                            print $line;
                            log_info(0, "    send: $line");
                            $post = 1 if $type eq "POST";
                        } elsif(/^\r$/) {
                            print;
                            last if $post;
                        } else {
                            print;
                            log_info(1, "    send: $_");
                        }
                    }

                    if($post) {
                        my $data = "";

                        read($local_client, $data, $length) or die "    error: $?";

                        print "$data";
                        log_info(1, "    post: [$length]");
                        log_info(2, "    data: $data");
                    }
                }

                kill('TERM', getppid());        # kill my twin cause we're done
            }
            exit;                               # whoever's still alive bites it
        }
    }
}

# helper function to produce a nice string in the form HOST:PORT
sub peerinfo {
    my $sock = shift;
    my $hostinfo = undef;
    my ($peeraddr, $peerport) = ("*", "*");

    if($sock->peeraddr) {
        $hostinfo = gethostbyaddr($sock->peeraddr);
        if($hostinfo and $hostinfo->name) {
            $peeraddr = $hostinfo->name;
        } elsif($hostinfo and $sock->peerhost) {
            $peeraddr = $sock->peerhost;
        } elsif($sock->peeraddr and length($sock->peeraddr) == 4) {
            $peeraddr = inet_ntoa($sock->peeraddr);
        }
    }
    $peerport = $sock->peerport if $sock->peerport;
    return sprintf "%s:%s", $peeraddr, $peerport;
}

# reset our $0, which on some systems make "ps" report
# something interesting: the string we set $0 to!
sub set_state { $0 = "$ME [@_]" }

# helper function to call set_state
sub accepting {
    set_state("accepting proxy for " . ($REMOTE || $SERVICE));
}

# somebody just died.  keep harvesting the dead until
# we run out of them.  check how long they ran.
sub REAPER {
    my $child;
    my $start;
    while (($child = waitpid(-1,WNOHANG)) > 0) {
        if ($start = $Children{$child}) {
            my $runtime = time() - $start;
            my $line = sprintf "process $child completed in %dm %ss\n",
                               $runtime / 60,
                               $runtime % 60;
            chomp $line;
            log_info(1, $line);
            delete $Children{$child};
        } else {
            log_info(1, "process $child exited [$?]");
        }
    }

    # If I had to choose between System V and 4.2, I'd resign. --Peter Honeyman
    $SIG{CHLD} = \&REAPER;
};