------------------------------------------------------------------------------- Handling escaped characters in URIs HTTP (the transfer protocol of WWW) escapes difficult characters like this: '+' is a space, '%' followed by the character code in hex Quick Decode... Decode Query String... perl -pe 'tr/+/ /; s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;' # Example.... # Input: %24argon2i%24v%3D19%24m%3D4096%2Ct%3D3%2Cp%3D1%24%2FFzf # Output: $argon2i$v=19$m=4096,t=3,p=1$/Fzf # --- Subroutines --- sub clean_arg { local($arg) = @_; $arg =~ tr/+/ /; # CGI string decoding $arg =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg; # $arg =~ s/\r\n?/\n/g; # clean up end of lines # $arg =~ s/^\s+//; # remove extra spaces at start # $arg =~ s/\s+$//; # remove extra spaces at end # $arg =~ s/\s+/ /g; # remove extra spaces in middle # $arg =~ tr/A-Z/a-z/; # make everything lowercase # $arg =~ s/<.*?>//sg; # Remove all HTML tags (accross multiple lines) # OR remove just the BAD embedded scripting HTML tags # We should actually also remove test surounded by tags too # $arg =~ s/<\/?(SCRIPT|OBJECT|APPLET|EMBED).*?>//sig; return( $arg ); } sub do_CGI_string { # split up a CGI variable string my($str) = @_; foreach $pair ( split(/&/, $str ) ) { $FORM{'search'} = &clean_arg( $pair ), next if $pair !~ /=/; local($name, $value) = split(/=/, $pair, 2 ); $FORM{&clean_arg($name)} = &clean_arg($value); # print &clean_arg($name), " = ", &clean_arg($value), "
\n"; } } # --- Decode Arguments --- if ( defined $ENV{"REQUEST_METHOD"} && $ENV{"REQUEST_METHOD"} eq "POST" ) { # POST method read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); &do_CGI_string($buffer); } elsif ( defined $ENV{"QUERY_STRING"} ) { # a direct url link (normal method of usage) &do_CGI_string($ENV{"QUERY_STRING"}); } else { #&do_CGI_string($ARGV[0]); print "ERROR: You are NOT calling via a CGI interface! -- ABORTING\n"; exit 0; } ------------------------------------------------------------------------------- Output plain text in HTML format sub htmlize_text { my($text) = @_; # Expand tabs (if text is NOT in a
..
) 1 while $text =~ s/\t+/ ' 'x( length($&)*8 - length($`)%8 ) /e; $text =~ s/\&/\&/g; # specific characters $text =~ s/\/\>/g; # do the following if text is NOT in a
..
$text =~ s/ / /g; # replace all spaces (including tabs) $text =~ s/\r?\n|\n/
\n/g; # replace end of lines # Escape any other binary characters $text =~ s/[\x00-\x08\x7F-\xFF\e]/sprintf("&%03o",ord($1))/eg; return $text; } For reverse see.. info/www/client.txt ------------------------------------------------------------------------------- url encode varbale and arguments for CGI get/post sub url_encode { my @a = @_; # make a copy of the arguments map { s/[^-\w.,!~'() ]/sprintf "%%%02x", ord $&/eg } @a; map { tr/ /+/ } @a; return wantarray ? @a : $a[0]; } sub post_arg_encode { my @parts; @_ = url_encode(@_); # encode all post arguments before joining push @parts, join('=', shift, shift) while @_; return join('&', @parts); } # Example usage my @post_args = ( filename => '/etc/options', chars => 'ABC-123', punct => '%^&*+', flag => '', spaces => 'This has spaces', ); print post_arg_encode(@post_args), "\n\n"; print join("\n", @post_args), "\n\n"; ------------------------------------------------------------------------------- Decode URL sub decode_url { my( $url ) = @_; $url =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg; return $url; } ------------------------------------------------------------------------------- Mime Base64 decode use MIME::Base64; $decoded = decode_base64( $encoded ); =============================================================================== Perl Modules for WWW use... LWP::Simple Ultra simple file downloader # Download URl to file "index.html" use LWP::Simple; $rc = mirror( $URL, "file.html" ); LWP::UserAgent Perl Light-weight WWW Library WARNING: the page is only downloaded into memory!! $ua = new LWP::UserAgent; $return = $ua->get( $URL ); die unless $return->is_success; OR $ua = new LWP::UserAgent; $request = new HTTP::Request( GET => $URL ); $return = $ua->request($request, "file.html" ); die unless $return->is_success; HTTP::Cookies Handle cookies from a website use LWP::UserAgent; use HTTP::Cookies $ua = LWP::UserAgent->new; $cookie_jar = HTTP::Cookies::Netscape->new( File => "$ENV{HOME}/.netscape/cookies", AutoSave => 1 ); $request = new HTTP::Request( GET => $URL ); $cookie_jar->add_cookie_header($request); $return = $ua->request($request, $file); SKUD/WWW-Automate-0.20 Get your Perl which following links like a human Eg: $agent->get($url); $agent->follow("for staff"); $agent->submit; $agent->back; Curl::easy Use the curl library to download files (NOT a OO interface) Also allows your to control actual download, (EG save to file or memory as it is being recieved), and do progress monitor/meter. $curl = Curl::easy::init() Curl::easy::setopt($curl, CURLOPT_TIMEOUT, 120); Curl::easy::setopt($curl, CURLOPT_URL, $url); open( FILE, ">$file" ); Curl::easy::setopt($curl, CURLOPT_FILE, *FILE); $return = Curl::easy::perform($curl); close( FILE ); Curl::easy::cleanup($curl); For more detail see my "http_files" script. Mime::Base64 Encode and decode base64 encoded strings. This includes www username:password headers, as well and mailed files. -------------------------------------------------------------------------------