-------------------------------------------------------------------------------
Handling escaped characters in URIs
HTTP (the transfer protocol of WWW) escapes difficult characters like
this: '+' is a space, '%' followed by the character code in hex
Quick Decode...
Decode Query String...
perl -pe 'tr/+/ /; s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;'
# Example....
# Input: %24argon2i%24v%3D19%24m%3D4096%2Ct%3D3%2Cp%3D1%24%2FFzf
# Output: $argon2i$v=19$m=4096,t=3,p=1$/Fzf
# --- Subroutines ---
sub clean_arg {
local($arg) = @_;
$arg =~ tr/+/ /; # CGI string decoding
$arg =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;
# $arg =~ s/\r\n?/\n/g; # clean up end of lines
# $arg =~ s/^\s+//; # remove extra spaces at start
# $arg =~ s/\s+$//; # remove extra spaces at end
# $arg =~ s/\s+/ /g; # remove extra spaces in middle
# $arg =~ tr/A-Z/a-z/; # make everything lowercase
# $arg =~ s/<.*?>//sg; # Remove all HTML tags (accross multiple lines)
# OR remove just the BAD embedded scripting HTML tags
# We should actually also remove test surounded by tags too
# $arg =~ s/<\/?(SCRIPT|OBJECT|APPLET|EMBED).*?>//sig;
return( $arg );
}
sub do_CGI_string { # split up a CGI variable string
my($str) = @_;
foreach $pair ( split(/&/, $str ) ) {
$FORM{'search'} = &clean_arg( $pair ), next if $pair !~ /=/;
local($name, $value) = split(/=/, $pair, 2 );
$FORM{&clean_arg($name)} = &clean_arg($value);
# print &clean_arg($name), " = ", &clean_arg($value), "
\n";
}
}
# --- Decode Arguments ---
if ( defined $ENV{"REQUEST_METHOD"}
&& $ENV{"REQUEST_METHOD"} eq "POST" ) {
# POST method
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
&do_CGI_string($buffer);
}
elsif ( defined $ENV{"QUERY_STRING"} ) {
# a direct url link (normal method of usage)
&do_CGI_string($ENV{"QUERY_STRING"});
}
else {
#&do_CGI_string($ARGV[0]);
print "ERROR: You are NOT calling via a CGI interface! -- ABORTING\n";
exit 0;
}
-------------------------------------------------------------------------------
Output plain text in HTML format
sub htmlize_text {
my($text) = @_;
# Expand tabs (if text is NOT in a
..
)
1 while $text =~ s/\t+/ ' 'x( length($&)*8 - length($`)%8 ) /e;
$text =~ s/\&/\&/g; # specific characters
$text =~ s/\\</g;
$text =~ s/\>/\>/g;
# do the following if text is NOT in a ..
$text =~ s/ / /g; # replace all spaces (including tabs)
$text =~ s/\r?\n|\n/
\n/g; # replace end of lines
# Escape any other binary characters
$text =~ s/[\x00-\x08\x7F-\xFF\e]/sprintf("&%03o",ord($1))/eg;
return $text;
}
For reverse see.. info/www/client.txt
-------------------------------------------------------------------------------
url encode varbale and arguments for CGI get/post
sub url_encode {
my @a = @_; # make a copy of the arguments
map { s/[^-\w.,!~'() ]/sprintf "%%%02x", ord $&/eg } @a;
map { tr/ /+/ } @a;
return wantarray ? @a : $a[0];
}
sub post_arg_encode {
my @parts;
@_ = url_encode(@_); # encode all post arguments before joining
push @parts, join('=', shift, shift) while @_;
return join('&', @parts);
}
# Example usage
my @post_args = (
filename => '/etc/options',
chars => 'ABC-123',
punct => '%^&*+',
flag => '',
spaces => 'This has spaces',
);
print post_arg_encode(@post_args), "\n\n";
print join("\n", @post_args), "\n\n";
-------------------------------------------------------------------------------
Decode URL
sub decode_url {
my( $url ) = @_;
$url =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;
return $url;
}
-------------------------------------------------------------------------------
Mime Base64 decode
use MIME::Base64;
$decoded = decode_base64( $encoded );
===============================================================================
Perl Modules for WWW use...
LWP::Simple
Ultra simple file downloader
# Download URl to file "index.html"
use LWP::Simple;
$rc = mirror( $URL, "file.html" );
LWP::UserAgent
Perl Light-weight WWW Library
WARNING: the page is only downloaded into memory!!
$ua = new LWP::UserAgent;
$return = $ua->get( $URL );
die unless $return->is_success;
OR
$ua = new LWP::UserAgent;
$request = new HTTP::Request( GET => $URL );
$return = $ua->request($request, "file.html" );
die unless $return->is_success;
HTTP::Cookies
Handle cookies from a website
use LWP::UserAgent;
use HTTP::Cookies
$ua = LWP::UserAgent->new;
$cookie_jar = HTTP::Cookies::Netscape->new(
File => "$ENV{HOME}/.netscape/cookies",
AutoSave => 1 );
$request = new HTTP::Request( GET => $URL );
$cookie_jar->add_cookie_header($request);
$return = $ua->request($request, $file);
SKUD/WWW-Automate-0.20
Get your Perl which following links like a human
Eg: $agent->get($url);
$agent->follow("for staff");
$agent->submit;
$agent->back;
Curl::easy
Use the curl library to download files (NOT a OO interface)
Also allows your to control actual download, (EG save to file or
memory as it is being recieved), and do progress monitor/meter.
$curl = Curl::easy::init()
Curl::easy::setopt($curl, CURLOPT_TIMEOUT, 120);
Curl::easy::setopt($curl, CURLOPT_URL, $url);
open( FILE, ">$file" );
Curl::easy::setopt($curl, CURLOPT_FILE, *FILE);
$return = Curl::easy::perform($curl);
close( FILE );
Curl::easy::cleanup($curl);
For more detail see my "http_files" script.
Mime::Base64
Encode and decode base64 encoded strings. This includes www
username:password headers, as well and mailed files.
-------------------------------------------------------------------------------