#!/usr/bin/perl # # log-tail, (c) Andrew Daviel # Vancouver Webpages, 1996 # http://vancouver-webpages.com/proxy/log-tail.pl # You may do what you like with this except say that you wrote it # or sell it for profit. # # This program is invoked as a CGI script # URL such as http://somewhere.org/cgi-bin/log-tail/error_log # Although it does print out the last entries in logfiles, its # primary purpose is to demonstrate a method of generating # a cacheable CGI script. # Modern browsers use local cache extensively, and a multi-level # national and international cache hierarchy is being developed # (see http://www.nlanr.net/Cache/, http://vancouver-webpages.com/proxy/ # for more information). # Where objects can be cached the response time is greatly improved, # which is why Netscape is better than the original Mosaic over # an analog modem. Hierarchical cache allows popular documents to be # cached nearby, so that they may be accessed at LAN speeds instead of # the (sometimes pitiful) national/International Internet speeds # # How documents are cached depends on the HTTP headers (see the # HTTP specification at ftp://ds.internic.net or http://www.w3.org) # It would be possible to create a document whose content depended # solely on its URL, which could be cached indefinitely and would # never expire (something like cgi-bin/add?2+3 giving "5"), but most # Web pages are generate from a file; the modification time of the file # is used to generate a Last-Modified header. The Get If-Modified-Since # mechanism used by Netscape allows the browser to quickly check that # its locally cached pages and images are up-to-date; where these # are generated by a CGI script, usually the Last-Modified header is missing # and a full GET is required, slowing the response time and consuming # long-distance bandwidth. # This script demonstrates a technique for generating HTTP headers to # allow CGI documents to be sensibly cached. # load the HTTP date routines $libloc = "/usr/local/lib/libwww-perl-0.40" ; unshift(@INC, $libloc); require "wwwdates.pl" ; # Given a URL such as "http://somewhere/cgi-bin/log-tail?error_log", # this code redirects it to "http://somewhere/cgi-bin/log-tail/error_log". # (or perhaps "http://somewhere/cache-cgi/log-tail/error_log". # This may be used to create a response URL cacheable by the # default configuration of the popular Squid proxy-cache agent. # The original URL with a query term (?) # might be generated by an HTML form, while the redirected URL can # be bookmarked and cached, with proper handling of If-Modified-Since. # This kind of technique can be used to make a hit counter for an image - # the image remains cacheable and the redirect (a few hundred bytes) is counted. $path = $ENV{'PATH_INFO'} ; $query = $ENV{'QUERY_STRING'}; if ($query && !$path) { # figure out our IP address in a semi-portable way. Hard code it if required. $host = `hostname` ; chop($host) ; print "Content-type: text/html\nLocation: http://$host/cgi-bin/log-tail/$query\n\n"; exit ; } # check that there really is an argument to the script # An illegal expiry date of "0" ensures that this response is not cached unless ($path) { print "Content-type: text/plain\nExpires: 0\n\n"; print "You must give a filename in the path, e.g. cgi-bin/log-tail/error_log\n"; exit ; } # Get the value (if any) of the If-Modified-Since header. Netscape operates # as follows (depending on the cache options set): # Operation Cached locally HTTP transaction # follow link No GET # follow link Yes none (use locally cached copy) # Reload No GET # Reload Yes GET + If-Modified-Since + pragma: no-cache # Shift-Reload Yes GET + pragma: no-cache # Netscape sends an IMS value such as "Friday, 20-Sep-96 07:26:24 GMT; length=1539" # i.e. it appends the locally cached length. This script ignores the # length, but must strip it for the date routines. The IMS value is converted # to an HTTP time value (RFC1123) $imsd = $ENV{'HTTP_IF_MODIFIED_SINCE'} ; $ims = $imsd ; $ims =~ s/;.*// ; # for Netscape $ims = &wwwdates'get_gmtime($ims) ; $day = 24*60*60 ; $week = $day*7 ; $hr8 = 8*60*60 ; # add the argument (file name) to the logfile root $file = "/usr/local/etc/httpd/logs".$path ; # get the modification time of the file # For a database lookup, it may be possible to assign a modification # time to an individual key. Otherwise the modification time of the # database may be given. # For a data acquisition application, such as a Webcam or rain gauge, it # may be possible to assign an arbitrary modification time based on the rate # of change of the quantity being measured, perhaps 10 minutes in the past # for the Webcam. The raingauge might register a change for every millimeter # of rainfall. Remember that a new current reading may be obtained by # doing a Reload (i.e. GET without IMS). @stat = stat($file) ; $size = $stat[7]; $modtime = $stat[9] ; # also take note of the modification time of the script itself @stat = stat($0) ; $mymodtime = $stat[9] ; if ($mymodtime > $modtime) { $modtime = $mymodtime ; } # If the file has not been modified since the IMS, return a code 304 # "not modified". This syntax works with the later Apache servers. if ($ims && $modtime<=$ims) { print "Status: 304 Not Modified\n\n" ; exit ; } # For demonstration purposes, assign an expiry date 5 minutes in the future. # For a real application, perhaps you know when the database will be # updated and can generate an appropriate value. In the absence of an Expires # date, cache agents guess an expiry date based on how recently the document # was modified, the type of document, etc. - a GIF that hasn't changed in # 3 months might be cached for a month, though as soon as someone does # a "reload" it will be updated in cache. $expires = time + 5*60 ; # expire in 5 minutes # Convert expiry and last-modified times to RFC1123 time $gexpires = &wwwdates'wtime($expires,'GMT'); $glastmod = &wwwdates'wtime($modtime ,'GMT'); $expires = &wwwdates'wtime($expires,''); $lastmod = &wwwdates'wtime($modtime ,''); $date = &wwwdates'wtime(time,'') ; $gdate = &wwwdates'wtime(time,'GMT') ; # This script generates a valid Content-length header. # This could be used to check against the Netscape IMS value, # but is otherwise not much used. Certain cache agents may # refuse to cache documents larger than a certain size. # Since the content-length is also included in the document # body, a dummy value is used, then overwritten by the real value. # A link to the Netscape "about:document" URL is provided # for convenience. In a real application the HTTP headers would # not be given in the body (and only GMT values need be calculated) $head = "Last 10 entries in logs$path

Last 10 entries in logs$path

A demonstration of a cacheable CGI script.
Andrew Daviel, Vancouver Webpages

about:document - about this document (in Netscape)

Content-type: text/html
Content-Length: xxxxxxx
Expires: $gexpires
Expires: $expires
Last-Modified: $glastmod
Last-Modified: $lastmod
Date: $gdate
Date: $date
If-Modified-Since: $imsd

Last 10 entries in $file

" ;
# generate the tail of the file given as an argument, add it to the
# header and calculate the length.

$tail = `tail -10 $file` ;
$doc = $head.$tail."\n" ;
$length = length($doc) ;
$slength = substr($length."       ",0,7) ;
$head =~ s/xxxxxxx/$slength/ ;
$doc = $head.$tail."\n" ;

# Generate the complete HTTP response for a successful GET
print "Content-type: text/html
Content-Length: $length
Expires: $gexpires
Last-Modified: $glastmod

$doc";