#!/usr/bin/perl =head1 NAME #!/usr/bin/perl graph-top-referers - produce a PNG graph of the top 10 referers in access_logs =head1 SYNOPSIS [todo] =head1 DESCRIPTION [todo] =head1 AUTHOR Justin Mason, jm at jmason dot org =cut ########################################################################### use File::Find; use URI::Escape; use Time::Local; use POSIX qw(strftime); use strict; use warnings; # my $EXCLUDE_RE = qr{^/(?:[^0-9]|$)}i; my $REQUIRE_RE = qr{\.html$}i; my $EXCLUDE_RE = qr{\.(?:gif|png|css|jpe?g)$}i; my $REQUIRE_RE = qr{.}; my $MAX_PAGES = 10; my %mon2mm = ( Jan => 0, Feb => 1, Mar => 2, Apr => 3, # note 0 based May => 4, Jun => 5, Jul => 6, Aug => 7, Sep => 8, Oct => 9, Nov => 10, Dec => 11 ); foreach my $dir (@ARGV) { File::Find::find (\&wanted, $dir); } my %count = (); my %whens = (); my $t_earliest; my $t_latest; sub wanted { return unless (-f $_); if (/\.gz$/) { return unless open (IN, "gunzip -cd '$_' |"); } else { return unless open (IN, "<$_"); } while () { # 71.235.13.232 - - [01/Apr/2006:00:01:10 +0100] "GET # /wp-content/themes/jmason/images/bg.gif HTTP/1.1" 200 992 # "http://taint.org/2004/11/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT # 5.1; .NET CLR 1.1.4322)" /^\S+ \S+ \S+ \[([^\]]+?)\] "\S+ ([^"]+?) \S+" (\d+) \S+ "([^"]+?)"/ or next; my $when = $1; my $tgt = $2; my $code = $3; my $refr = $4; next if ($code !~ /^2/); # ignore error codes please next unless ($refr =~ /^http/); next if ($tgt =~ m{$EXCLUDE_RE}); next unless ($tgt =~ m{$REQUIRE_RE}); $when =~ /^(\d\d)\/(...)\/(\d\d\d\d):(\d\d):(\d\d):(\d\d) /; $when = timegm($6,$5,$4,$1,$mon2mm{$2},$3) || 0; if (!defined $t_earliest || $when < $t_earliest) { $t_earliest = $when; } if (!defined $t_latest || $when > $t_latest) { $t_latest = $when; } $count{$tgt}++; push (@{$whens{$tgt}}, $when); } close IN; } my $bucketsize = ($t_latest - $t_earliest) / 100; my $idx = 0; my @legend = (); open OUT, ">refgp.dat"; foreach my $tgt (sort { $count{$b} <=> $count{$a} } keys %count) { $idx++; # warn "$count{$tgt} $tgt\n"; last if ($idx > $MAX_PAGES); my @hits = (); foreach my $hit (sort @{$whens{$tgt}}) { my $bkt = int (($hit - $t_earliest) / $bucketsize); $hits[$bkt]++; } my $bcount = 0; foreach my $bkt (@hits) { $bcount++; # next if !$bkt; $bkt ||= 0; my $t = int ($t_earliest + ($bucketsize * $bcount)); print OUT "$t $bkt\n"; } print OUT "\n\n"; $legend[$idx] = "$tgt"; } close OUT; open GP, ">refgp.run"; print GP qq{ set terminal png size 1024,768 set timefmt "%s" set xdata time set format x "%04Y%02m%02d" }; my @plot = (); foreach $idx (0 .. ($idx-2)) { my $leg = $legend[$idx+1]; push @plot, qq{ "refgp.dat" using 1:2 index $idx }. qq{ with linespoints lw 1 title '$leg' }; } print GP "plot ".join(", ", @plot)."\n"; close GP; print "[gnuplot -persist < refgp.run > refgp.png]\n"; system "gnuplot -persist < refgp.run > refgp.png"; exit;