# Standard.pm -- Some script primitives for sunir.org # ----------- # February 23, 2001 -- Copyright Sunir Shah. All rights reserved. package Standard; use English; use Exporter; use Time::Local; @ISA = qw( Exporter ); @EXPORT = qw( SayHello Redirect Die ParseQuery CachePath SaveCache LoadCache Cache Fetch FetchRendered FetchDirect FetchMeatball SaveMeatball HTTPPost ConvertTimestamp ParseAccessLogEntry URLEncode UnquoteHTML GetUsemodLog GetUsemodLogList Timestamp Lynx ); @EXPORT_OK = qw(); @EXPORT_TAGS = qw(); $OUTPUT_AUTOFLUSH = 1; # Do not buffer output # Read whole files. Way more efficient for the caching. undef $INPUT_RECORD_SEPARATOR; my $saidHello = 0; # ------------------------------------------------- GLOBALS $CacheDirectory = '/www/sites/sshah/hyper.zoku.net/cache'; $Lynx = '/usr/local/bin/lynx'; # ------------------------------------------------- GLOBALS eval { require LWP::Simple; require LWP::UserAgent; $UserAgent = new LWP::UserAgent( requests_redirectable => [qw(GET HEAD POST)] # for HTTPPost ); $UserAgent->agent( "Sunir's Standard.pm " . $UserAgent->agent); $LWP = 1; }; # SayHello( [content-type] ) # -------------------------- # Send the HTTP header. sub SayHello { my $type = shift || "text/html"; print "Content-type: $type\n\n" unless $saidHello; $saidHello = 1; } # Redirect # -------- # Send the HTTP redirect header. # First paramater is the target URL; sub Redirect { my $url = shift; print "Location: $url\n\n" unless $saidHello; $saidHello = 1; } # Die # --- # Throw a nice, web-friendly error exception. # First argument is an error message. sub Die { my $message = shift; $message =~ s/&/&/g; my $caller = caller; SayHello; print < Error Sorry, the page could not be loaded due to an error. This totally useless error message was thrown at $caller:

$message

If you wish, you could tell Sunir about it. EOF die $message; } # ParseQuery # ---------- # Parses the query from the user agent. If the Query is unknown # it gets the query from $::DefaultQuery. If the action is known, # it gets the action from $::DefaultAction. An action is defined # as action=parameter. Thus, if you get ?parameter, this will be # translated to ?$::DefaultAction=parameter. # # Returns a hash of the query values. sub ParseQuery { my $Query; if( $::ENV{REQUEST_METHOD} eq 'GET' ) { $Query = ($::ENV{QUERY_STRING} || $::DefaultQuery); } elsif( $::ENV{REQUEST_METHOD} eq 'POST' ) { read( STDIN, $Query, $::ENV{CONTENT_LENGTH} ); } # Maybe there's something on the command line for us? elsif( @::ARGV ) { $Query = join '&', @::ARGV; } # Must be testing; read parameters from STDIN. else { $Query = <>; $::ENV{REQUEST_METHOD} = 'POST'; } $Query = "$::DefaultAction=$Query" if !($Query =~ /=/); $Query =~ s/\+/ /g; foreach $_ (split( /&/, $Query )) { s/\%(..)/pack(C, hex($1))/ge; ($_, $Query) = split(/=/, $_, 2 ); $Query{$_} = $Query; } return %Query; } # CachePath( url ) # ----------------- # Returns the path to the given url in the cache sub CachePath { my $url = shift; $url =~ s|http://(.*?)/?$|$1|; $url =~ s|(.*?)@||; # Authentication return "$CacheDirectory/$url"; } # SaveCache( url, contents ) # --------------------------- # Actually stores the $contents of $url in the cache. sub SaveCache { my ($url, $contents) = @_; my $path = CachePath($url); my $CACHE; open CACHE, ">$path" or Die "Could not open cache file $path for writing."; print CACHE $contents; close CACHE; } # LoadCache( url ) # ---------------- # Actually loads the cached version of $url sub LoadCache { my ($url) = @_; open FILE, ('<' . CachePath($url)) or Die "Could not open $url from cache."; my $result = ; close FILE; return $result; } # Cache( url ) # ------------ # Caches the given URL. Returns true on success. sub Cache { my ($url) = @_; my $path = CachePath($url); # Don't recache twice within a week. return if -e $path && -M $path < 7; # Attempt to refetch it. $path =~ /(.*)\//; `mkdir -p "$1"`; my $contents = FetchDirect($url); SaveCache( $url, $contents ) if $contents; # If a cached entry is more than two weeks old, delete it. unlink $path if -M $path > 14; } # Fetch( url ) # ------------ # Grabs the text at the other end of the URL. It will # grab the text from the cache; if the text is not in # the cache, it will be created in the cache. sub Fetch { my ($url) = @_; Cache($url); return LoadCache($url); } # FetchRendered( url ) # -------------------- # Grabs the rendered output from Lynx of a page. sub FetchRendered { my $url = shift; Cache($url); my $path = CachePath($url); return `$Lynx -dump -force_html "$path"` or Die "Could not render $path."; } # FetchDirect( url ) # ------------------ # Gets the text at the other end of the URL, but not # from the cache. sub FetchDirect { my ($url) = @_; return LWP::Simple::get($url) if defined $LWP; my $auth = $1 if $url =~ s|http://(.*?)@|http://|; return `$Lynx -auth="$auth" -source "$url"` if $auth; return `$Lynx -source "$url"`; } # FetchMeatball( LinkPattern ) # ---------------------------- # Grabs the wiki source for LinkPattern off of MeatballWiki sub FetchMeatball { return FetchDirect('http://usemod.com/cgi-bin/mb.pl?action=browse&raw=1&id='.$_[0]); } # SaveMeatball( LinkPattern, contents, [summary] ) # ------------------------------------------------ # Saves $contents onto the page LinkPattern on MeatballWiki sub SaveMeatball { my ($id, $contents, $summary) = @_; $summary = '*' unless defined($summary); $summary = URLEncode($summary); $contents = URLEncode($contents); my $savePage = FetchDirect("http://usemod.com/cgi-bin/mb.pl?action=edit&id=$id"); # This is ugly ugly ugly. my ($oldtime) = $savePage =~ m@name="oldtime".*?value="(.*?)"@; my ($oldconflict) = $savePage =~ m@name="oldconflict".*?value="(.*?)"@; my $post = "oldtime=$oldtime&oldconflict=$oldconflict&title=$id&summary=$summary&Save=Save&text=$contents"; return HTTPPost( 'http://usemod.com/cgi-bin/mb.pl', $post ); } # HTTPPost( url, text ) # --------------------- # HTTP "post"s $text to url. sub HTTPPost { my ($url, $text) = @_; if( $LWP ) { my $request = new HTTP::Request POST => $url; $request->content_type('application/x-www-form-urlencoded'); $request->content($text); my $response = $UserAgent->request($request); Die "HTTP Post failed: " . $response->status_line unless $response->is_success; return $response->content; } # NB: Lynx support is currently different from LWP support. # It will not return the response. my $OUTPUT; open OUTPUT, "| $Lynx -post_data $url" or Die "Could not post to $url: $!"; local $SIG{PIPE} = sub { Die "output pipe broken"; }; print OUTPUT $text; close OUTPUT or Die "bad output pipe: $! $?"; } # ConvertTimestamp( timestamp ) # ------------------------------ # Converts a timestamp from the log file to seconds # since 1 January 1970 0:00 GMT. sub ConvertTimestamp { my $timestamp = shift; $timestamp =~ /(\d{2})\/(\w{3})\/(\d{4})\:(\d{2})\:(\d{2}):(\d{2})\s(\-\d{4})/; my ( $day, $month, $year, $hour, $minute, $second, $timezone ) = ($1, $2, $3, $4, $5, $6, $7); 'JanFebMarAprMayJunJulAugSepOctNovDec' =~ /$month/; $month = length($PREMATCH) / 3; $year -= 1900; $timezone = ($timezone / 100) * 3600; return timegm($second, $minute, $hour, $day, $month, $year) - $timezone; } # ParseAccessLogEntry( line ) # --------------------------- # Breaks out the important information from an Apache access_log entry. # Returns ($ip, $timestamp, $page, $refer, $browser) sub ParseAccessLogEntry { my $line = shift; $line =~ m/ ([^\s]+) # IP address or domain of request ($1) [^\[]* # " - - " ??dunno?? \[(\d{2}\/\w{3}\/\d{4}\:\d{2}\:\d{2}:\d{2}\s.\d{4})\] # timestamp ($2) \s\"\w+\s # Request type (GET | POST) ([^\"\s]+) # Page ($3) .*?\" # HTTP protocol (.*?\" # Bytes received and sent (.+)\" # Referal URL ($5) \s\"([^\"]+)\")? # Browser ($6) /x; return ($1, $2, $3, $5, $6); } # URLEncode( string ) # --------------------- # Returns an application/x-www-form-urlencoded version of string sub URLEncode { my $string = shift; $string =~ s/([^a-zA-Z0-9\$\-\_\@\.\+\!\*\(\)\,])/sprintf("%%%02x",ord($1))/ge; return $string; } # UnquoteHTML( string ) # --------------------- # Decodes character entities. sub UnquoteHTML { my $string = shift; $string =~ s/\<//sg; $string =~ s/\"e?;/"/sg; $string =~ s/\&/&/sg; return $string; } # GetUsemodLogList() # ------------------ # Returns a list of usemod.com log files cached. See GetUsemodLog(). sub GetUsemodLogList { my $logDirectory = Fetch 'http://usemod.com/cgi-bin/logcopy.pl'; my @logs = (); while( $logDirectory =~ /(access\.\d+\.gz)/g ) { push @logs, $1; } # Sort in chronological order, newest first @logs = sort { ($a . $b) =~ /access\.(\d+)\.gzaccess\.(\d+)\.gz/; $2 <=> $1 } @logs; unshift @logs, "access.today"; return @logs; } # GetUsemodLog() # -------------- # Loads the given log file from usemod.com. See GetUsemodLogList(). sub GetUsemodLog { my ($logFilename) = @_; my $log; if( $logFilename eq 'access.today' ) { my $cachePath = CachePath('http://usemod.com/wwwlogs/access.today'); my $size = -s $cachePath; $log = FetchDirect( "http://usemod.com/cgi-bin/logtoday.pl?oldsize=$size" ); open FILE, ">>$cachePath" or Die "Could not append to access.today"; print FILE $log; close FILE; open FILE, "<$cachePath" or Die "Could not open access.today for reading."; $log = ; close FILE; } else { my $logPath = 'http://usemod.com/wwwlogs/' . $logFilename; my $cachedLog = CachePath( $logPath ); if( !-e $cachedLog ) { $log = Fetch( $logPath ); open GZIP, "|gzip -f > $cachedLog" or Die "Could not open pipe to gzip."; print GZIP $log; close GZIP; } $log = `gunzip -c $cachedLog`; } return $log; } # Timestamp() # ----------- # Returns a printable timestamp in two halves. # The first half is the time, the second is the date. sub Timestamp { my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(time()); $mon++; $mon = "0$mon" if $mon < 10; $mday = "0$mday" if $mday < 10; $min = "0$min" if $min < 10; $sec = "0$sec" if $sec < 10; $year += 1900; return ("$hour:$min:$sec", "$mday $mon $year"); } 1;