#!/usr/bin/perl use CGI qw(:standard); use HTML::PullParser; use Data::Dumper; my $url; if ($ENV{'PATH_INFO'} eq '') { $url = param("proxgo"); if ($url ne '') { $url =~ s|^http://||i; $url =~ s|^/||; print "Location: $ENV{SCRIPT_NAME}/$url\n\n"; exit 0; } } else { $url = "$ENV{'PATH_INFO'}?$ENV{'QUERY_STRING'}"; } if ($url eq '') { print header("text/html"); print < Phone Proxy
Go to:
EOF ; exit 0; } $url =~ s/\?$//; $url =~ s|^/([^/])|$1|; $url = "http://$url" unless $url =~ m|http://|i; $url =~ s/\"//g; my $base = $url; $base =~ s|^http://||i; $base =~ s|/[^/]*$||; $base = "$base/" unless substr($base, -1, 1) eq '/'; my $data = qx|wget -U "$ENV{'HTTP_USER_AGENT'}" -O - "$url"|; print STDERR "$0: Got " . length($data) . " bytes for $url\n"; print header("text/html"); print "\n"; print "\n"; #print "$0: Got " . length($data) . " bytes for $url\n"; my $p = HTML::PullParser->new( doc => $data, start => 'event,tagname,attr,text', end => 'event,tagname', text => '@{dtext}', unbroken_text => 1, ignore_elements => ['script', 'iframe', 'style', 'noframes']); my $token; my $output; while (defined($token = $p->get_token())) { if (my @token = @{$token}) { # text is a scalar, tags are an array ref if (my $start = shift(@token)) { if ($start eq 'start') { my $tag = shift(@token); if ($tag eq 'a') { # handle links my $attr = shift(@token); my $linktext; while (my $text = $p->get_token()) { if (my @t = @{$text}) { last if $t[1] eq 'a'; # scan until close tag (if we're nesting a tags we have a problem) } else { $linktext .= $text; } } my $href = $attr->{href}; if ($href !~ /^http:/i) { $href = "$base$href"; } $href =~ s|^http://||; $href = "$ENV{'SCRIPT_NAME'}/$href"; $linktext =~ s/(\S{20})/$1 /g; $output .= qq|$linktext\n|; } elsif ($tag =~ /^tr/) { # table row $output .= "
\n"; } elsif ($tag =~ /^t[hd]/) { # table cell $output .= "|\n"; } elsif ($tag =~ /(^form$)/) { # handle forms specially (action specifically) my $action = $token->[2]->{action}; my $text = $token->[3]; if ($action !~ /^http:/) { $action = "$base$action"; } $action =~ s|^http://||; $text =~ s|action=\".+?\"|action=\"$ENV{'SCRIPT_NAME'}/$action\"|; $text =~ s/method\s*=\s*"?post"?/method=GET/i; $output .= $text; } elsif ($tag =~ /(^h.$)|(^b$)|(^i$)|(^br$)|(^p$)|(^input$)|(^textarea$)|(^select$)|(^.l$)|(^li$)|(^title$)|(^option$)/i) { $output .= $token->[3]; # print raw text of tag } elsif ($tag =~ /^html$/) { # do simple html tag $output .= ""; } elsif ($tag =~ /^body$/) { # do simple body tag $output .= ""; } elsif ($tag =~ /^frame/) { my $frameurl = fixurl($token->[2]->{src}); if ($tag eq 'frameset') { $output .= ""; } else { $output .= qq|Frame: $token->[2]->{name}
\n|; } } } else { my $tag = shift(@token); if ($tag =~ /(^h.$)|(^b$)|(^i$)|(^form$)|(^textarea$)|(^select$)|(^.l$)|(^li$)|(^title$)|(^option$)|(^html$)|(^body$)/) { $output .= ""; } } } } else { # nope, it's just text my $text = $token; $text =~ s/(\S{20})/$1 /g; $output .= $text; } } $output =~ s/(
\s*|

\s*)+/
/ig; $output =~ s/(\s*
\s*)+/
/ig; $output =~ s/
\s*
/
/ig; $output =~ s|()\s*
|$1|ig; $output =~ s/\|(
|\s+)\|/\|/ig; $output =~ s/\n\s*/\n/g; $output =~ s/\n+/\n/g; $output =~ s/
\n*\|/
/gi; $output =~ s/\s+/ /g; print $output; sub fixurl { my $url = shift; if ($url !~ /^http:/i) { $url = "$base$url"; } $url =~ s|^http://||; $url = "$ENV{'SCRIPT_NAME'}/$url"; return $url; $output .= "\n"; }