-------------------------------------------------------------------------------
Handling escaped characters in URIs

HTTP (the transfer protocol of WWW) escapes difficult characters like
this:  '+' is a space,   '%' followed by the character code in hex

Quick Decode...

Decode Query String...

  perl -pe 'tr/+/ /; s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;'

  # Example....
  # Input:  %24argon2i%24v%3D19%24m%3D4096%2Ct%3D3%2Cp%3D1%24%2FFzf
  # Output: $argon2i$v=19$m=4096,t=3,p=1$/Fzf


# ---  Subroutines ---
sub clean_arg {
  local($arg) = @_;
  $arg =~ tr/+/ /;      # CGI string decoding
  $arg =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;

  # $arg =~ s/\r\n?/\n/g;  # clean up end of lines

  # $arg =~ s/^\s+//;     # remove extra spaces at start
  # $arg =~ s/\s+$//;     # remove extra spaces at end
  # $arg =~ s/\s+/ /g;    # remove extra spaces in middle

  # $arg =~ tr/A-Z/a-z/;  # make everything lowercase

  # $arg =~ s/<.*?>//sg;  # Remove all HTML tags (accross multiple lines)

  # OR remove just the BAD embedded scripting HTML tags
  # We should actually also remove test surounded by tags too
  # $arg =~ s/<\/?(SCRIPT|OBJECT|APPLET|EMBED).*?>//sig;

  return( $arg );
}

sub do_CGI_string {   # split up a CGI variable string
  my($str) = @_;
  foreach $pair ( split(/&/, $str ) ) {
    $FORM{'search'} = &clean_arg( $pair ), next  if $pair !~ /=/;
    local($name, $value) = split(/=/, $pair, 2 );
    $FORM{&clean_arg($name)} = &clean_arg($value);
    # print &clean_arg($name), " = ", &clean_arg($value), "<BR>\n";
  }
}

# --- Decode Arguments ---
if ( defined $ENV{"REQUEST_METHOD"}
      && $ENV{"REQUEST_METHOD"} eq "POST" ) {
  # POST method
  read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
  &do_CGI_string($buffer);
}
elsif ( defined $ENV{"QUERY_STRING"} ) {
  # a direct url link (normal method of usage)
  &do_CGI_string($ENV{"QUERY_STRING"});
}
else {
  #&do_CGI_string($ARGV[0]);
  print "ERROR: You are NOT calling via a CGI interface! -- ABORTING\n";
  exit 0;
}


-------------------------------------------------------------------------------
Output plain text in HTML format

sub htmlize_text {
  my($text) = @_;

  # Expand tabs (if text is NOT in a <PRE>..</PRE>)
  1 while $text =~ s/\t+/ ' 'x( length($&)*8 - length($`)%8 ) /e;

  $text =~ s/\&/\&amp;/g;         # specific characters
  $text =~ s/\</\&lt;/g;
  $text =~ s/\>/\&gt;/g;

  # do the following if text is NOT in a <PRE>..</PRE>
  $text =~ s/ /&nbsp;/g;          # replace all spaces (including tabs)
  $text =~ s/\r?\n|\n/<BR>\n/g;   # replace end of lines

  # Escape any other binary characters
  $text =~ s/[\x00-\x08\x7F-\xFF\e]/sprintf("&%03o",ord($1))/eg;

  return $text;
}

For reverse see.. info/www/client.txt

-------------------------------------------------------------------------------
url encode varbale and arguments for CGI get/post

sub url_encode {
  my @a = @_;  # make a copy of the arguments
  map { s/[^-\w.,!~'() ]/sprintf "%%%02x", ord $&/eg } @a;
  map { tr/ /+/ } @a;
  return wantarray ? @a : $a[0];
}

sub post_arg_encode {
  my @parts;
  @_ = url_encode(@_); # encode all post arguments before joining
  push @parts, join('=', shift, shift)  while @_;
  return join('&', @parts);
}

# Example usage

my @post_args = (
  filename => '/etc/options',
  chars    => 'ABC-123',
  punct    => '%^&*+',
  flag     => '',
  spaces   => 'This has spaces',
);

print post_arg_encode(@post_args), "\n\n";
print join("\n", @post_args), "\n\n";

-------------------------------------------------------------------------------
Decode URL

sub decode_url {
  my( $url ) = @_;
  $url =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr hex $1/eg;
  return $url;
}

-------------------------------------------------------------------------------
Mime Base64 decode

use MIME::Base64;

$decoded = decode_base64( $encoded );

===============================================================================
Perl Modules for WWW use...

  LWP::Simple
             Ultra simple file downloader
               # Download URl to file "index.html"
               use LWP::Simple;
               $rc = mirror( $URL, "file.html" );

  LWP::UserAgent
             Perl Light-weight WWW Library
             WARNING: the page is only downloaded into memory!!
                  $ua = new LWP::UserAgent;
                  $return = $ua->get( $URL );
                  die unless $return->is_success;
             OR
                  $ua = new LWP::UserAgent;
                  $request = new HTTP::Request( GET => $URL );
                  $return = $ua->request($request, "file.html" );
                  die unless $return->is_success;

  HTTP::Cookies
             Handle cookies from a website
               use LWP::UserAgent;
               use HTTP::Cookies
               $ua = LWP::UserAgent->new;
               $cookie_jar = HTTP::Cookies::Netscape->new(
                               File     => "$ENV{HOME}/.netscape/cookies",
                               AutoSave => 1 );
               $request = new HTTP::Request( GET => $URL );
               $cookie_jar->add_cookie_header($request);
               $return = $ua->request($request, $file);

  SKUD/WWW-Automate-0.20
             Get your Perl which following links like a human
             Eg:   $agent->get($url);
                   $agent->follow("for staff");
                   $agent->submit;
                   $agent->back;

  Curl::easy
             Use the curl library to download files (NOT a OO interface)
             Also allows your to control actual download, (EG save to file or
             memory as it is being recieved), and do progress monitor/meter.
                 $curl = Curl::easy::init()
                 Curl::easy::setopt($curl, CURLOPT_TIMEOUT, 120);
                 Curl::easy::setopt($curl, CURLOPT_URL, $url);
                 open( FILE, ">$file" );
                 Curl::easy::setopt($curl, CURLOPT_FILE, *FILE);
                 $return = Curl::easy::perform($curl);
                 close( FILE );
                 Curl::easy::cleanup($curl);
             For more detail see my "http_files" script.

  Mime::Base64
             Encode and decode base64 encoded strings. This includes www
             username:password headers, as well and mailed files.

-------------------------------------------------------------------------------