#!/usr/bin/perl -w ################## # # hiplink.pl -- 856 logging script for HIP 3 # # see http://www.daveyp.com/blog/index.php/archives/111/ # and also http://benostrowsky.wordpress.com/2007/04/26/improving-daveyps-hip-link-tracker/ # ############# # # Knocked together by Dave Pattern # If you spot any bugs, please let me know! # # Modified by Ben Ostrowsky # ditto! # ############# # # version 1.00 - last updated 29/Sep/2006 # version 1.10 - last updated 26/Apr/2007 # # ######### # # (cc) 2006, 2007 # # http://creativecommons.org/licenses/by-nc-sa/2.5/ # ##### use strict; use POSIX qw(strftime); my $bib = 0; my $ttl = ''; my $profile = ''; ### HEY KIDS, WHAT TIME IS IT? my $now = time(); my $tz = strftime("%z", localtime($now)); $tz =~ s/(\d{2})(\d{2})/$1:$2/; # all hail the merciful ISO 8601 or be crushed by its boot of vengeance my $timestamp = strftime("%Y-%m-%dT%H:%M:%S", localtime($now)) . $tz; ### GET THE 856 URL BEING PASSED TO THE SCRIPT... my $url = $ENV{QUERY_STRING} || ''; ### TRY AND GET THE URL OF THE REFERING PAGE... my $ref = $ENV{HTTP_REFERER} || ''; ### WHAT HOST REFERRED US? my $pac = $ref; $pac =~ s/^.+?:\/\/(.+?)\/.*$/$1/; ### FQDN OF YOUR HIP SERVER... OR A REGEX PATTERN... my $hip = 'suncat2.tblc.org|ipacserver.tblc.org'; ### THE LOG FILE... my $log = '/var/log/httpd/hiplink_log'; ### GET THE USER'S IP ADDRESS (THE REAL ONE) my $ipaddr = $ENV{HTTP_X_FORWARDED_FOR} || $ENV{REMOTE_ADDR} || ''; ### IF WE'VE NOT GOT AN 856 URL, THEN RETURN AN HTTP STATUS 204 (NO ACTION)... unless( $url ) { print "status: 204\n\n"; exit; } ### IF THE REFERING PAGE IS A HIP PAGE, THEN TRY AND GET THE BIB NUMBER AND TITLE... if( $ref && $ref =~ /$hip/ ) { ( $bib, $ttl, $profile ) = getBib( $ref ) } ### REDIRECT THE USER'S WEB BROWSER TO THE 856 URL... print "Location: $url\n\n"; ### "LOG IT!" (TO BE SHOUTED IN THE SAME WAY WITHNAIL SHOUTS "FORK IT!") open( OUT, ">>$log" ); print OUT "$timestamp\t$ipaddr\t$pac\t$profile\t$bib\t$ttl\t$url\n"; close( OUT ); # # Log format: # Each line, tab-separated, contains: # timestamp (ISO 8601) # ip_address (of the requester) # hip_hostname (since some people have multiple HIP servers) # profile code # bib# # title of bib # url # # Sample output: # # 2007-04-26T19:38:05-04:00 64.128.136.8 ipacserver.tblc.org fdl 1052760 Test record. http://benostrowsky.tblc.org/ # ### TO GET THE BIB NUMBER, TITLE, ETC., WE TAKE THE REFERING URL AND TWEAK IT TO FETCH THE XML VERSION OF THE HIP PAGE... sub getBib { my $url = shift; my $bib = 0; my $ttl = ''; my $profile = ''; if( $url =~ /^http/ ) { use LWP::Simple; $url =~ s/\#.*$//; $url .= '&GetXML=true'; my $content = get( $url ); if( $content =~ /\(\d\d*)\<\/key\>/i ) { $bib = $1; } if( $content =~ /\\\(.+?)\<\/text\>/i ) { $ttl = $1; } if( $content =~ /\(.+?)\<\/profile\>/i ) { $profile = $1; } } return( $bib, $ttl, $profile ); }