#! /usr/bin/perl -w # Copyright (c) 1999 University of Cambridge. # See the file NOTICE for conditions of use and distribution. # Perl script to generate statistics from one or more Exim log files. # Usage: eximstats [] ... # 1996-05-21: Ignore lines not starting with valid date/time, just in case # these get into a log file. # 1996-11-19: Add the -h option to control the size of the histogram, # and optionally turn it off. # Use some Perl 5 things; it should be everywhere by now. # Add the Perl -w option and rewrite so no warnings are given. # Add the -t option to control the length of the "top" listing. # Add the -ne, -nt options to turn off errors and transport # information. # Add information about length of time on queue, and -q to # control the intervals and turn it off. # Add count and percentage of delayed messages to the Received # line. # Show total number of errors. # Add count and percentage of messages with errors to Received # line. # Add information about relaying and -nr to suppress it. # 1997-02-03 Merged in some of the things Nigel Metheringham had done: # Re-worded headings # Added received histogram as well as delivered # Added local senders' league table # Added local recipients' league table # 1997-03-10 Fixed typo "destinationss" # Allow for intermediate address between final and original # when testing for relaying # Give better message when no input # 1997-04-24 Fixed bug in layout of error listing that was depending on # text length (output line got repeated). # 1997-05-06 Bug in option decoding when only one option. # Overflow bug when handling very large volumes. # 1997-10-28 Updated to handle revised log format that might show # HELO name as well as host name before IP number # 1998-01-26 Bugs in the function for calculating the number of seconds # since 1970 from a log date # 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log # line; cope with this, thereby avoiding undefined problems # Very short log line gave substring error # 1998-02-03 A routed delivery to a local transport may not have <> in the # log line; terminate the address at white space, not < # 1998-09-07 If first line of input was a => line, $thissize was undefined; # ensure it is zero. # 1998-12-21 Adding of $thissize from => line should have been adding $size. # Oops. Should have looked more closely when fixing the previous # bug! # 1999-11-12 Increased the field widths for printed integers; numbers are # bigger than originally envisaged. use integer; ################################################## # Static data # ################################################## @tab62 = (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9 0,10,11,12,13,14,15,16,17,18,19,20, # A-K 21,22,23,24,25,26,27,28,29,30,31,32, # L-W 33,34,35, 0, 0, 0, 0, 0, # X-Z 0,36,37,38,39,40,41,42,43,44,45,46, # a-k 47,48,49,50,51,52,53,54,55,56,57,58, # l-w 59,60,61); # x-z @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60, 12*60*60, 24*60*60); ################################################## # Subroutines # ################################################## # For handling very large volumes, the number of gigabytes is # passed as a separate argument. sub print_volume_rounded { my($g) = pop @_; my($x) = pop @_; while ($x > $gig) { $g++; $x -= $gig; } # Values < 1 GB if ($g <= 0) { if ($x < 10000) { printf("%6d", $x); } elsif ($x < 10000000) { printf("%4dKB", ($x + 512)/1024); } else { printf("%4dMB", ($x + 512*1024)/(1024*1024)); } } # Values between 1GB and 10GB are printed in MB elsif ($g < 10) { printf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024)); } # Handle values over 10GB else { printf("%4dGB", $g + ($x + $gig/2)/$gig); } } sub format_time { my($t) = pop @_; my($s) = $t % 60; $t /= 60; my($m) = $t % 60; $t /= 60; my($h) = $t % 24; $t /= 24; my($d) = $t % 7; my($w) = $t/7; my($p) = ""; $p .= "$w"."w" if $w > 0; $p .= "$d"."d" if $d > 0; $p .= "$h"."h" if $h > 0; $p .= "$m"."m" if $m > 0; $p .= "$s"."s" if $s > 0 || $p eq ""; $p; } # Given a log date/time, compute seconds since 1970 sub seconds { my($y,$mo,$d,$h,$mi,$s) = (pop @_) =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/; my($leap) = ($y % 4)? 0 : 1; $d += $days_per_month[$mo-1]; $d-- unless ($leap && $mo > 2); $y -= 1970; $d += $y * 365 + ($y+1)/4; # NB This relies on use integer. $s += 60 * ($mi + 60 * ($h + $d * 24)); $s; } # Given a message id, compute seconds sub id_seconds { my($id) = substr((pop @_), 0, 6); my($s) = 0; my(@c) = split(//, $id); while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] } $s; } # Print time on queue information sub print_queue_times { no integer; my($string,$array) = @_; $printed_one = 0; $cumulative_percent = 0; $queue_unknown += keys %arrival_time; $queue_total = $queue_more_than; for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] } my($temp) = "Time spent on the queue: $string"; printf ("%s\n%s\n\n", $temp, "-" x length($temp)); for ($i = 0; $i <= $#queue_times; $i++) { if ($$array[$i] > 0) { $percent = ($$array[$i] * 100)/$queue_total; $cumulative_percent += $percent; printf("%s %4s %6d %5.1f%% %5.1f%%\n", $printed_one? " " : "Under", &format_time($queue_times[$i]), $$array[$i], $percent, $cumulative_percent); $printed_one = 1; } } if ($queue_more_than > 0) { $percent = ($queue_more_than * 100)/$queue_total; $cumulative_percent += $percent; printf("Over %4s %6d %5.1f%% %5.1f%%\n", &format_time($queue_times[$#queue_times]), $queue_more_than, $percent, $cumulative_percent); } #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0; print "\n"; } # Print histogram sub print_histogram { my($text) = shift; my(@interval_count) = @_; my($maxd) = 0; for ($i = 0; $i < $hist_number; $i++) { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; } $scale = int(($maxd + 25)/50); $scale = 1 if $scale == 0; if ($text eq "Deliveries") { $type = ($scale == 1)? "delivery" : "deliveries"; } else { $type = ($scale == 1)? "message" : "messages"; } my($temp) = sprintf("$text per %s (each dot is $scale $type)", ($hist_interval == 60)? "hour" : ($hist_interval == 1)? "minute" : "$hist_interval minutes"); printf("%s\n%s\n\n", $temp, "-" x length($temp)); $hour = 0; $minutes = 0; for ($i = 0; $i < $hist_number; $i++) { $c = $interval_count[$i]; # If the interval is an hour (the maximum) print the starting and # ending hours as a label. Otherwise print the starting hour and # minutes, which take up the same space. if ($hist_opt == 1) { printf("%02d-%02d", $hour, $hour + 1); $hour++; } else { if ($minutes == 0) { printf("%02d:%02d", $hour, $minutes) } else { printf(" :%02d", $minutes) } $minutes += $hist_interval; if ($minutes >= 60) { $minutes = 0; $hour++; } } printf(" %6d %s\n", $c, "." x ($c/$scale)); } print "\n"; } # Print league table sub print_league_table { my($text) = $_[0]; my($m_count) = $_[1]; my($m_data) = $_[2]; my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s"; my($temp) = "Top $name by message count"; printf ("%s\n%s\n\n", $temp, "-" x length($temp)); $count = 1; foreach $key (sort { $$m_count{$b} <=> $$m_count{$a} || $$m_data{$b} <=> $$m_data{$a} || $a cmp $b } keys %{$m_count}) { printf("%7d %10d %s\n", $$m_count{$key}, $$m_data{$key}, $key); last if $count++ >= $topcount; } $temp = "Top $name by volume"; printf ("\n%s\n%s\n\n", $temp, "-" x length($temp)); $count = 1; foreach $key (sort { $$m_data{$b} <=> $$m_data{$a} || $$m_count{$b} <=> $$m_count{$a} || $a cmp $b } keys %{$m_count}) { printf("%7d %10d %s\n", $$m_count{$key}, $$m_data{$key}, $key); last if $count++ >= $topcount; } print "\n"; } ################################################## # Main Program # ################################################## $delayed_count = 0; $relayed_unshown = 0; $show_errors = 1; $show_relay = 1; $show_transport = 1; $queue_more_than = 0; $queue_unknown = 0; $topcount = 50; $local_league_table = 1; $hist_opt = 1; $begin = "9999-99-99 99:99:99"; $end = "0000-00-00 00:00:00"; $gig = 1024 * 1024 * 1024; # Decode options while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') { if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 } elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 } elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/) { if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 } } elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/) { @queue_times = split(/,/, $1); foreach $q (@queue_times) { $q = eval($q) + 0 } @queue_times = sort { $a <=> $b } @queue_times; @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0); } elsif ($ARGV[0] =~ /^\-nt$/) { $show_transport = 0 } elsif ($ARGV[0] =~ /^\-t(\d+)$/) { $topcount = $1 } elsif ($ARGV[0] =~ /^\-tnl$/) { $local_league_table = 0 } else { print "Eximstats: Unknown or malformed option $ARGV[0]\n" unless $ARGV[0] =~ /^\-help$/; print " Valid options are:\n"; print " -h histogram divisions per hour\n"; print " default is 1, 0 suppresses histograms\n"; print " -ne don't display error information\n"; print " -nr don't display relaying information\n"; print " -nr/pattern/ don't display relaying information that matches\n"; print " -nt don't display transport information\n"; print " -q list of times for queuing information\n"; print " single 0 item suppresses\n"; print " -t display top sources/destinations\n"; print " default is 50, 0 suppresses top listing\n"; print " -tnl omit local sources/destinations in top listing\n"; exit 1; } shift; } # Initialize slots for queue times for ($i = 0; $i <= $#queue_times; $i++) { $queue_bin[$i] = 0; $remote_queue_bin[$i] = 0; } # Compute the number of slots for the histogram if ($hist_opt > 0) { if ($hist_opt > 60 || 60 % $hist_opt != 0) { print "Eximstats: -h must specify a factor of 60\n"; exit 1; } $hist_interval = 60/$hist_opt; $hist_number = (24*60)/$hist_interval; @received_interval_count = (0) x $hist_number; @delivered_interval_count = (0) x $hist_number; } # Scan the input files and collect the data $received_data_total = 0; $received_data_gigs = 0; $received_count_total = 0; $delivered_data_total = 0; $delivered_data_gigs = 0; $delivered_count_total = 0; while (<>) { next if ! /^[\d]{4}\-[\d]{2}\-[\d]{2}\s[\d]{2}:[\d]{2}:[\d]{2}/; $tod = substr($_, 0, 19); $id = substr($_, 20, 16); $begin = $tod if $tod lt $begin; $end = $tod if $tod gt $end; next if length($_) < 38; $flag = substr($_, 37, 2); if ($flag eq "<=") { ($thissize) = /\sS=([0-9]+)( |$)/; $thissize = 0 if !defined($thissize); $size{$id} = $thissize; ($host) = /\sH=(\S+)/; if (defined $host) { if ($show_relay) # Save incoming information { # in case it becomes interesting ($from) = /^.{40}(\S+)/; # later, when delivery lines are read ($ip) = /\sH=\S+(?:(?=\s\()\s\S+)?(\s\[[^]]*\])/; $ip = "" if !defined $ip; $from_host{$id} = "$host$ip"; $from_address{$id} = $from; } } else { $host = "local"; ($user) = /\sU=(\S+)/; if (defined $user) { $received_count_user{$user}++; $received_data_user{$user} = 0 if !defined $received_data_user{$user}; $received_data_user{$user} += $thissize; } } $received_count{$host}++; $received_data{$host} = 0 if !defined $received_data{$host}; $received_data{$host} += $thissize; $received_count_total++; $received_data_total += $thissize; if ($received_data_total > $gig) { $received_data_gigs++; $received_data_total -= $gig; } $arrival_time{$id} = $tod if $#queue_times >= 0; if ($hist_opt > 0) { ($hour,$minute) = $tod =~ /^\S+ ([0-9]{2}):([0-9]{2}):/; $received_interval_count[($minute+$hour*60)/$hist_interval]++; } } elsif ($flag eq "=>") { $size = $size{$id}; $size = 0 if !defined $size; ($host) = /\sH=(\S+)/; if (defined $host) { $remote_delivered{$id} = 0 if !defined($remote_delivered{$id}); $remote_delivered{$id}++; # Determine relaying address if either only one address listed, # or two the same. If they are different, it implies a forwarding # or aliasing, which is not relaying. Note that for multi-aliased # addresses, there may be a further address between the first # and last. if ($show_relay && defined $from_host{$id}) { ($ip) = /\sH=\S+(?:(?=\s\()\s\S+)?(\s\[[^]]*\])/; $ip = "" if !defined $ip; if (/^.{40}(\S+)(?:\s+\([^)]\))?\s+<([^>]+)>/) { $old = $1; $new = $2 } else { $old = $new = ""; } if ("\L$new" eq "\L$old") { ($old) = /^.{40}(\S+)/ if $old eq ""; $key = "H=\L$from_host{$id}\E A=\L$from_address{$id}\E => " . "H=\L$host\E$ip A=\L$old\E"; if (!defined $relay_pattern || $key !~ /$relay_pattern/o) { $relayed{$key} = 0 if !defined $relayed{$key}; $relayed{$key}++; } else { $relayed_unshown++ } } } } else { $host = "local"; my($rest) = substr($_,40); if (($user) = split(($rest =~ /]*>)/; $user = "$user $parent" if defined $parent; } $delivered_count_user{$user}++; $delivered_data_user{$user} = 0 if !defined $delivered_data_user{$user}; $delivered_data_user{$user} += $size; } } $delivered_count{$host}++; $delivered_data{$host} = 0 if !defined $delivered_data{$host}; $delivered_data{$host} += $size; $delivered_count_total++; $delivered_data_total += $size; if ($delivered_data_total > $gig) { $delivered_data_gigs++; $delivered_data_total -= $gig; } if ($show_transport) { ($transport) = /\sT=(\S+)/; $transport = ":blackhole:" if !defined $transport; $transported_data{$transport} = 0 if !defined $transported_data{$transport}; $transported_data_gigs{$transport} = 0 if !defined $transported_data_gigs{$transport}; $transported_count{$transport}++; $transported_data{$transport} += $size; if ($transported_data{$transport} > $gig) { $transported_data_gigs{$transport}++; $transported_data{$transport} -= $gig; } } if ($hist_opt > 0) { ($hour,$minute) = $tod =~ /^\S+ ([0-9]{2}):([0-9]{2}):/; $delivered_interval_count[($minute+$hour*60)/$hist_interval]++; } } elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) { $delayed_count++; $delayed{$id} = 1; } elsif ($flag eq "**") { $had_error{$id} = 1 if defined ($size{$id}); if ($show_errors) { ($error) = substr($_, 40); $errors_count{$error}++; } } elsif (/Completed$/) { if ($#queue_times >=0) { $arrival = $arrival_time{$id}; delete($arrival_time{$id}); $queued = &seconds($tod) - (defined($arrival)? &seconds($arrival) : &id_seconds($id)); for ($i = 0; $i <= $#queue_times; $i++) { if ($queued < $queue_times[$i]) { $queue_bin[$i]++; $remote_queue_bin[$i]++ if $remote_delivered{$id}; last; } } $queue_more_than++ if $i > $#queue_times; } if ($show_relay) { delete($from_host{$id}); delete($from_address{$id}); } delete($size{$id}); } } if ($begin eq "9999-99-99 99:99:99") { print "**** No valid log lines read\n"; exit 1; } print "\nExim statistics from $begin to $end\n"; # Print grand totals print "\nGrand total summary"; print "\n-------------------"; print "\n At least one address"; print "\n TOTAL Volume Messages Hosts Delayed Failed"; print "\n Received "; &print_volume_rounded($received_data_total, $received_data_gigs); { no integer; $failed_count = keys %had_error; printf(" %6d %4d %6d %4.1f%% %6d %4.1f%%", $received_count_total, scalar(keys %received_data), $delayed_count, ($received_count_total == 0)? 0 : ($delayed_count*100)/$received_count_total, $failed_count, ($received_count_total == 0)? 0 : ($failed_count*100)/$received_count_total); } print "\n Delivered "; &print_volume_rounded($delivered_data_total, $delivered_data_gigs); printf(" %6d %4d\n", $delivered_count_total, scalar(keys %delivered_data)); print "\n"; # Print totals by transport if required if ($show_transport) { print "Deliveries by transport\n"; print "-----------------------"; print "\n Volume Messages"; foreach $key (sort keys %transported_data) { printf( "\n %-16s ", $key); &print_volume_rounded($transported_data{$key}, $transported_data_gigs{$key}); printf( " %6d", $transported_count{$key}); } print "\n\n"; } # Print the deliveries per interval as a histogram, unless configured not to. # First find the maximum in one interval and scale accordingly. if ($hist_opt > 0) { &print_histogram("Messages received", @received_interval_count); &print_histogram("Deliveries", @delivered_interval_count); } # Print times on queue if required if ($#queue_times >= 0) { &print_queue_times("all messages", \@queue_bin); &print_queue_times("messages with at least one remote delivery", \@remote_queue_bin); } # Print relay information if required if ($show_relay) { if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) { $shown = 0; $spacing = ""; print "Relayed messages\n"; print "----------------\n\n"; foreach $key (sort keys %relayed) { $count = $relayed{$key}; $shown += $count; $key =~ s/[HA]=//g; ($one,$two) = split(/=> /, $key); printf("%7d %s\n => %s\n", $count, $one, $two); $spacing = "\n"; } print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n"; } else { print "No relayed messages\n"; print "-------------------\n"; } print "\n"; } # If the topcount is zero, print no league tables if ($topcount > 0) { &print_league_table("sending host", \%received_count, \%received_data); &print_league_table("local sender", \%received_count_user, \%received_data_user) if $local_league_table; &print_league_table("destination", \%delivered_count, \%delivered_data); &print_league_table("local destination", \%delivered_count_user, \%delivered_data_user) if $local_league_table; } # Omit error statistics if configured out if ($show_errors) { $total_errors = 0; if (scalar(keys %errors_count) != 0) { print "List of errors\n"; print "--------------\n\n"; foreach $key (sort keys %errors_count) { chop($text = $key); printf("%5d ", $errors_count{$key}); $total_errors += $errors_count{$key}; while (length($text) > 65) { ($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/; last if !$first; printf("%s\n ", $first); $text = $rest; } printf("%s\n\n", $text); } } $temp = "Errors encountered: $total_errors"; printf("%s\n%s\n\n", $temp, "-" x length($temp)); } # End of eximstats