# Standard.pm -- Some script primitives for sunir.org # ----------- # February 23, 2001 -- Copyright Sunir Shah. All rights reserved. package Standard; use English; use Exporter; use Time::Local; @ISA = qw( Exporter ); @EXPORT = qw( SayHello Redirect Die ParseQuery CachePath Cache Fetch FetchRendered FetchDirect ConvertTimestamp ParseAccessLogEntry UnquoteHTML GetUsemodLog GetUsemodLogList Lynx ); @EXPORT_OK = qw(); @EXPORT_TAGS = qw(); $OUTPUT_AUTOFLUSH = 1; # Do not buffer output # Read whole files. Way more efficient for the caching. undef $INPUT_RECORD_SEPARATOR; my $saidHello = 0; # ------------------------------------------------- GLOBALS $CacheDirectory = '/www/sites/sshah/hyper.zoku.net/cache'; $Lynx = '/usr/local/bin/lynx'; # ------------------------------------------------- GLOBALS eval { require LWP::Simple; $LWP = 1; }; # SayHello( [content-type] ) # -------------------------- # Send the HTTP header. sub SayHello { my $type = shift || "text/html"; print "Content-type: $type\n\n" unless $saidHello; $saidHello = 1; } # Redirect # -------- # Send the HTTP redirect header. # First paramater is the target URL; sub Redirect { my $url = shift; print "Location: $url\n\n" unless $saidHello; $saidHello = 1; } # Die # --- # Throw a nice, web-friendly error exception. # First argument is an error message. sub Die { my $message = shift; $message =~ s/&/&/g; SayHello; print < Error Sorry, the page could not be loaded due to an error. This totally useless error message was thrown:

$message

If you wish, you could tell Sunir about it. EOF die $message; } # ParseQuery # ---------- # Parses the query from the user agent. If the Query is unknown # it gets the query from $::DefaultQuery. If the action is known, # it gets the action from $::DefaultAction. An action is defined # as action=parameter. Thus, if you get ?parameter, this will be # translated to ?$::DefaultAction=parameter. # # Returns a hash of the query values. sub ParseQuery { my $Query; if( $::ENV{REQUEST_METHOD} eq 'GET' ) { $Query = ($::ENV{QUERY_STRING} || $::DefaultQuery); } elsif( $::ENV{REQUEST_METHOD} eq 'POST' ) { read( STDIN, $Query, $::ENV{CONTENT_LENGTH} ); } # Maybe there's something on the command line for us? elsif( @::ARGV ) { $Query = join '&', @::ARGV; } # Must be testing; read parameters from STDIN. else { $Query = <>; $::ENV{REQUEST_METHOD} = 'POST'; } $Query = "$::DefaultAction=$Query" if !($Query =~ /=/); $Query =~ s/\+/ /g; foreach $_ (split( /&/, $Query )) { s/\%(..)/pack(C, hex($1))/ge; ($_, $Query) = split(/=/, $_, 2 ); $Query{$_} = $Query; } return %Query; } # CachePath( url ) # ----------------- # Returns the path to the given url in the cache sub CachePath { my $url = shift; $url =~ m|http://(.*?)/?$|; return "$CacheDirectory/$1"; } # Cache( url ) # ------------ # Caches the given URL. Returns true on success. sub Cache { my $url = shift; my $path = CachePath($url); if( !-e $path ) { $path =~ /(.*)\//; `mkdir -p "$1"`; `$Lynx -source "$url" > "$path"`; #or Die "Could not cache $url at $path"; } } # Fetch( url ) # ------------ # Grabs the text at the other end of the URL. It will # grab the text from the cache; if the text is not in # the cache, it will be created in the cache. sub Fetch { my $url = shift; Cache($url); open FILE, ('<' . CachePath($url)) or Die "Could not open $url from cache."; my $result = ; close FILE; return $result; } # FetchRendered( url ) # -------------------- # Grabs the rendered output from Lynx of a page. sub FetchRendered { my $url = shift; Cache($url); my $path = CachePath($url); return `$Lynx -dump -force_html "$path"` or Die "Could not render $path."; } # FetchDirect( url ) # ------------------ # Gets the text at the other end of the URL, but not # from the cache. sub FetchDirect { my $url = shift; return LWP::Simple::get($url) if defined $LWP; return `$Lynx -source "$url"`; } # ConvertTimestamp( timestamp ) # ------------------------------ # Converts a timestamp from the log file to seconds # since 1 January 1970 0:00 GMT. sub ConvertTimestamp { my $timestamp = shift; $timestamp =~ /(\d{2})\/(\w{3})\/(\d{4})\:(\d{2})\:(\d{2}):(\d{2})\s(\-\d{4})/; my ( $day, $month, $year, $hour, $minute, $second, $timezone ) = ($1, $2, $3, $4, $5, $6, $7); 'JanFebMarAprMayJunJulAugSepOctNovDec' =~ /$month/; $month = length($PREMATCH) / 3; $year -= 1900; $timezone = ($timezone / 100) * 3600; return timegm($second, $minute, $hour, $day, $month, $year) - $timezone; } # ParseAccessLogEntry( line ) # --------------------------- # Breaks out the important information from an Apache access_log entry. # Returns ($ip, $timestamp, $page, $refer, $browser) sub ParseAccessLogEntry { my $line = shift; $line =~ m/ ([^\s]+) # IP address or domain of request ($1) [^\[]* # " - - " ??dunno?? \[(\d{2}\/\w{3}\/\d{4}\:\d{2}\:\d{2}:\d{2}\s.\d{4})\] # timestamp ($2) \s\"\w+\s # Request type (GET | POST) ([^\"\s]+) # Page ($3) .*?\" # HTTP protocol (.*?\" # Bytes received and sent (.+)\" # Referal URL ($5) \s\"([^\"]+)\")? # Browser ($6) /x; return ($1, $2, $3, $5, $6); } # UnquoteHTML( string ) # --------------------- # Decodes character entities. sub UnquoteHTML { my $string = shift; $string =~ s/\<//sg; $string =~ s/\"e?;/"/sg; $string =~ s/\&/&/sg; return $string; } # GetUsemodLogList() # ------------------ # Returns a list of usemod.com log files cached. See GetUsemodLog(). sub GetUsemodLogList { my $logDirectory = Fetch 'http://usemod.com/cgi-bin/logcopy.pl'; my @logs = (); while( $logDirectory =~ /(access\.\d+\.gz)/g ) { push @logs, $1; } # Sort in chronological order, newest first @logs = sort { ($a . $b) =~ /access\.(\d+)\.gzaccess\.(\d+)\.gz/; $2 <=> $1 } @logs; unshift @logs, "access.today"; return @logs; } # GetUsemodLog() # -------------- # Loads the given log file from usemod.com. See GetUsemodLogList(). sub GetUsemodLog { my ($logFilename) = @_; my $log; if( $logFilename eq 'access.today' ) { my $cachePath = CachePath('http://usemod.com/wwwlogs/access.today'); my $size = -s $cachePath; $log = FetchDirect( "http://usemod.com/cgi-bin/logtoday.pl?oldsize=$size" ); open FILE, ">>$cachePath" or Die "Could not append to access.today"; print FILE $log; close FILE; open FILE, "<$cachePath" or Die "Could not open access.today for reading."; $log = ; close FILE; } else { my $logPath = 'http://usemod.com/wwwlogs/' . $logFilename; my $cachedLog = CachePath( $logPath ); if( !-e $cachedLog ) { $log = Fetch( $logPath ); open GZIP, "|gzip -f > $cachedLog" or Die "Could not open pipe to gzip."; print GZIP $log; close GZIP; } $log = `gunzip -c $cachedLog`; } return $log; } 1;