#!/usr/bin/perl -w # Copyright (c) 2001 University of Cambridge. # See the file NOTICE for conditions of use and distribution. # Perl script to generate statistics from one or more Exim log files. # Usage: eximstats [] ... # 1996-05-21: Ignore lines not starting with valid date/time, just in case # these get into a log file. # 1996-11-19: Add the -h option to control the size of the histogram, # and optionally turn it off. # Use some Perl 5 things; it should be everywhere by now. # Add the Perl -w option and rewrite so no warnings are given. # Add the -t option to control the length of the "top" listing. # Add the -ne, -nt options to turn off errors and transport # information. # Add information about length of time on queue, and -q to # control the intervals and turn it off. # Add count and percentage of delayed messages to the Received # line. # Show total number of errors. # Add count and percentage of messages with errors to Received # line. # Add information about relaying and -nr to suppress it. # 1997-02-03 Merged in some of the things Nigel Metheringham had done: # Re-worded headings # Added received histogram as well as delivered # Added local senders' league table # Added local recipients' league table # 1997-03-10 Fixed typo "destinationss" # Allow for intermediate address between final and original # when testing for relaying # Give better message when no input # 1997-04-24 Fixed bug in layout of error listing that was depending on # text length (output line got repeated). # 1997-05-06 Bug in option decoding when only one option. # Overflow bug when handling very large volumes. # 1997-10-28 Updated to handle revised log format that might show # HELO name as well as host name before IP number # 1998-01-26 Bugs in the function for calculating the number of seconds # since 1970 from a log date # 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log # line; cope with this, thereby avoiding undefined problems # Very short log line gave substring error # 1998-02-03 A routed delivery to a local transport may not have <> in the # log line; terminate the address at white space, not < # 1998-09-07 If first line of input was a => line, $thissize was undefined; # ensure it is zero. # 1998-12-21 Adding of $thissize from => line should have been adding $size. # Oops. Should have looked more closely when fixing the previous # bug! # 1999-11-12 Increased the field widths for printed integers; numbers are # bigger than originally envisaged. # 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug # whereby seconds($timestamp) - id_seconds($id) gave an # incorrect result. # Added POD documentation. # Moved usage instructions into help() subroutine. # Added 'use strict' and declared all global variables. # Added '-html' flag and resultant code. # Added '-cache' flag and resultant code. # Added add_volume() routine and converted all volume variables # to use it, fixing the overflow problems for individual hosts # on large sites. # Converted all volume output to GB/MB/KB as appropriate. # Don't store local user stats if -nfl is specified. # Modifications done by: Steve Campbell () # 2001-04-02 Added the -t_remote_users flag. Steve Campbell. # 2001-10-15 Added the -domain flag. Steve Campbell. # 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell. # 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail. # We now generate our main parsing subroutine as an eval statement # which improves performance dramatically when not all the results # are required. We also cache the last timestamp to time convertion. # # NOTE: 'Top 50 destinations by (message count|volume)' lines are # now 'Top N (host|email|domain) destinations by (message count|volume)' # where N is the topcount. Steve Campbell. # 2001-10-30 Fixed minor bugs in add_volume() when taking over this version # for use in Exim 4: -w gave uninitialized value warnings in # two situations: for the first addition to a counter, and if # there were never any gigabytes, thereby leaving the $gigs # value unset. # Initialized $last_timestamp to stop a -w uninitialized warning. # Minor layout tweak for grand totals (nitpicking). # Put the IP addresses for relaying stats in [] and separated by # a space from the domain name. # Removed the IPv4-specific address test when picking out addresses # for relaying. Anything inside [] is OK. =head1 NAME eximstats - generates statistics from Exim mainlog files. =head1 SYNOPSIS eximstats [Options] mainlog1 mainlog2 ... > report.txt Options: =over 4 =item B<-h>I histogram divisions per hour. The default is 1, and 0 suppresses histograms. Valid values are: 0, 1, 2, 3, 5, 10, 15, 20, 30 or 60. =item B<-ne> Don't display error information. =item B<-nr> Don't display relaying information. =item B<-nr>I Don't display relaying information that matches. =item B<-nt> Don't display transport information. =item B<-q>I List of times for queuing information single 0 item suppresses. =item B<-t>I Display top sources/destinations default is 50, 0 suppresses top listing. =item B<-tnl> Omit local sources/destinations in top listing. =item B<-t_remote_users> Include remote users in the top source/destination listings. =item B<-byhost> Show results by sending host. This may be combined with B<-bydomain> and/or B<-byemail>. If none of these options are specified, then B<-byhost> is assumed as a default. =item B<-bydomain> Show results by sending domain. May be combined with B<-byhost> and/or B<-byemail>. =item B<-byemail> Show results by sender's email address. May be combined with B<-byhost> and/or B<-bydomain>. =item B<-html> Output the results in HTML. =item B<-cache> Cache results of timegm() lookups. This will result in a significant speedup when processing hundreds of thousands of messages, at a cost of increasing the memory utilisation. =back =head1 DESCRIPTION Eximstats parses exim mainlog files and outputs a statistical analysis of the messages processed. By default, a text analysis is generated, but you can request an html analysis by using the B<-html> flag. =head1 AUTHOR There is a web site at http://www.exim.org - this contains details of the mailing list exim-users@exim.org. =head1 TO DO This program does not perfectly handle messages whose received and delivered log lines are in different files, which can happen when you have multiple mail servers and a message cannot be immeadiately delivered. Fixing this could be tricky... =head1 SUBROUTINES The following section will only be of interest to the program maintainers: =cut use integer; use strict; use Time::Local; ################################################## # Static data # ################################################## # Will convert from 'use vars' to 'our' when perl 5.6.0 is out for # Solaris 2.6 on sunfreeware.com. use vars qw(@tab62 @days_per_month $gig); use vars qw($VERSION); @tab62 = (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9 0,10,11,12,13,14,15,16,17,18,19,20, # A-K 21,22,23,24,25,26,27,28,29,30,31,32, # L-W 33,34,35, 0, 0, 0, 0, 0, # X-Z 0,36,37,38,39,40,41,42,43,44,45,46, # a-k 47,48,49,50,51,52,53,54,55,56,57,58, # l-w 59,60,61); # x-z @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); $gig = 1024 * 1024 * 1024; $VERSION = '1.16'; # Declare global variables. use vars qw($total_received_data $total_received_data_gigs $total_received_count); use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count); use vars qw(%arrival_time %size %from_host %from_address); use vars qw(%timestamp2time); #Hash of timestamp => time. use vars qw($last_timestamp $last_time); #The last time convertion done. use vars qw($i); #General loop counter. use vars qw($debug); #Debug mode? # The following are parameters whose values are # set by command line switches: use vars qw($show_errors $show_relay $show_transport); use vars qw($topcount $local_league_table $include_remote_users); use vars qw($hist_opt $hist_interval $hist_number); use vars qw($relay_pattern @queue_times $html); use vars qw($cache_id_times); use vars qw(%do_sender); #Do sender by Host, Domain, and/or Email tables. # The following are modified in the parse() routine, and # referred to in the print_*() routines. use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end); use vars qw(%received_count %received_data %received_data_gigs); use vars qw(%delivered_count %delivered_data %delivered_data_gigs); use vars qw(%received_count_user %received_data_user %received_data_gigs_user); use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user); use vars qw(%transported_count %transported_data %transported_data_gigs); use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count); use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count); ################################################## # Subroutines # ################################################## =head2 volume_rounded(); $rounded_volume = volume_rounded($bytes,$gigabytes); Given a data size in bytes, round it to KB, MB, or GB as appropriate. Eg 12000 => 12KB, 15000000 => 14GB, etc. Note: I've experimented with Math::BigInt and it results in a 33% performance degredation as opposed to storing numbers split into bytes and gigabytes. =cut sub volume_rounded { my($x,$g) = @_; my($rounded); while ($x > $gig) { $g++; $x -= $gig; } # Values < 1 GB if ($g <= 0) { if ($x < 10000) { $rounded = sprintf("%6d", $x); } elsif ($x < 10000000) { $rounded = sprintf("%4dKB", ($x + 512)/1024); } else { $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024)); } } # Values between 1GB and 10GB are printed in MB elsif ($g < 10) { $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024)); } # Handle values over 10GB else { $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig); } return $rounded; } =head2 add_volume(); add_volume(\$bytes,\$gigs,$size); Add $size to $bytes/$gigs where this is a number split into bytes ($bytes) and gigabytes ($gigs). This is significantly faster than using Math::BigInt. =cut sub add_volume { my($bytes_ref,$gigs_ref,$size) = @_; $$bytes_ref = 0 if ! defined $$bytes_ref; $$gigs_ref = 0 if ! defined $$gigs_ref; $$bytes_ref += $size; while ($$bytes_ref > $gig) { $$gigs_ref++; $$bytes_ref -= $gig; } } =head2 format_time(); $formatted_time = format_time($seconds); Given a time in seconds, break it down into weeks, days, hours, minutes, and seconds. Eg 12005 => 3h20m5s =cut sub format_time { my($t) = pop @_; my($s) = $t % 60; $t /= 60; my($m) = $t % 60; $t /= 60; my($h) = $t % 24; $t /= 24; my($d) = $t % 7; my($w) = $t/7; my($p) = ""; $p .= "$w"."w" if $w > 0; $p .= "$d"."d" if $d > 0; $p .= "$h"."h" if $h > 0; $p .= "$m"."m" if $m > 0; $p .= "$s"."s" if $s > 0 || $p eq ""; $p; } =head2 seconds(); $time = seconds($timestamp); Given a time-of-day timestamp, convert it into a time() value using timegm. We expect the timestamp to be of the form "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12, and the year to be absolute (we do the necessary conversions). We use timegm rather than timelocal as the id_seconds appears to be calculated in GM time. Should we ever switch to local time for any reason, we should calculate the localtime offset once at the beginning of the program, and then use timegm from then on as timelocal() is horribly inefficient. We store the results of the last conversion done, so as to speed things up when consecutive mainlog lines have the same timestamp. If the -cache flag is specified, then we cache the results of the gmtime lookup. This results in a performance boost when processing hundreds of thousands of messages per day at the cost of maintaining a memory cache. =cut sub seconds { my($timestamp) = @_; # Is the timestamp the same as the last one? return $last_time if ($last_timestamp eq $timestamp); # No. Have we got the timestamp cached? if ($cache_id_times && $timestamp2time{$timestamp}) { return($timestamp2time{$timestamp}); } return 0 unless ($timestamp =~ /^(\d{4})\-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/); my(@timestamp) = ($1,$2,$3,$4,$5,$6); #Adjust the values, as per gmtime(), and then reverse it to #to put it into the correct order. $timestamp[0] -= 1900; $timestamp[1]--; my $time = timegm(reverse @timestamp); if ($cache_id_times) { $timestamp2time{$timestamp} = $time; } # Store the last timestamp received. $last_timestamp = $timestamp; $last_time = $time; $time; } =head2 id_seconds(); $time = id_seconds($message_id); Given a message ID, convert it into a time() value. =cut sub id_seconds { my($sub_id) = substr((pop @_), 0, 6); my($s) = 0; my(@c) = split(//, $sub_id); while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] } $s; } =head2 calculate_localtime_offset(); $localtime_offset = calculate_localtime_offset(); Calculate the the localtime offset from gmtime in seconds. $localtime = gmtime() + $localtime_offset. This subroutine commented out as it's not currently in use. =cut #sub calculate_localtime_offset { # # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference. # my (@sample_date) = (0,0,0,5,5,100); # my $localtime = timelocal(@sample_date); # my $gmtime = timegm(@sample_date); # my $offset = $localtime - $gmtime; # return $offset; #} =head2 print_queue_times(); $time = print_queue_times($message_type,\@queue_times,$queue_more_than); Given the type of messages being output, the array of message queue times, and the number of messages which exceeded the queue times, print out a table. =cut sub print_queue_times { no integer; my($string,$array,$queue_more_than) = @_; my $printed_one = 0; my $cumulative_percent = 0; #$queue_unknown += keys %arrival_time; my $queue_total = $queue_more_than; for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] } my $temp = "Time spent on the queue: $string"; my($format); if ($html) { print "

$temp

\n"; print "\n"; print "\n"; $format = "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); $format = "%5s %4s %6d %5.1f%% %5.1f%%\n"; } for ($i = 0; $i <= $#queue_times; $i++) { if ($$array[$i] > 0) { my $percent = ($$array[$i] * 100)/$queue_total; $cumulative_percent += $percent; printf($format, $printed_one? " " : "Under", format_time($queue_times[$i]), $$array[$i], $percent, $cumulative_percent); $printed_one = 1; } } if ($queue_more_than > 0) { my $percent = ($queue_more_than * 100)/$queue_total; $cumulative_percent += $percent; printf($format, "Over ", format_time($queue_times[$#queue_times]), $queue_more_than, $percent, $cumulative_percent); } #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0; print "
TimeMessagesPercentageCumulative Percentage
%s %s%d%5.1f%%%5.1f%%
\n" if $html; print "\n"; } =head2 print_histogram(); print_histogram('Deliverieds|Messages received',@interval_count); Print a histogram of the messages delivered/received per time slot (hour by default). =cut sub print_histogram { my($text) = shift; my(@interval_count) = @_; my($maxd) = 0; for ($i = 0; $i < $hist_number; $i++) { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; } my $scale = int(($maxd + 25)/50); $scale = 1 if $scale == 0; my($type); if ($text eq "Deliveries") { $type = ($scale == 1)? "delivery" : "deliveries"; } else { $type = ($scale == 1)? "message" : "messages"; } my($temp) = sprintf("$text per %s (each dot is $scale $type)", ($hist_interval == 60)? "hour" : ($hist_interval == 1)? "minute" : "$hist_interval minutes"); if ($html) { print "

$temp

\n
\n";
}
else {
  printf("%s\n%s\n\n", $temp, "-" x length($temp));
}

my $hour = 0;
my $minutes = 0;
for ($i = 0; $i < $hist_number; $i++)
  {
  my $c = $interval_count[$i];

  # If the interval is an hour (the maximum) print the starting and
  # ending hours as a label. Otherwise print the starting hour and
  # minutes, which take up the same space.

  if ($hist_opt == 1)
    {
    printf("%02d-%02d", $hour, $hour + 1);
    $hour++;
    }
  else
    {
    if ($minutes == 0)
      { printf("%02d:%02d", $hour, $minutes) }
    else
      { printf("  :%02d", $minutes) }
    $minutes += $hist_interval;
    if ($minutes >= 60)
      {
      $minutes = 0;
      $hour++;
      }
    }

  printf(" %6d %s\n", $c, "." x ($c/$scale));
  }
print "\n";
print "
\n" if $html; } =head2 print_league_table(); print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs); Given hashes of message count and message data, which are keyed by the table type (eg by the sending host), print a league table showing the top $topcount (defaults to 50). =cut sub print_league_table { my($text,$m_count,$m_data,$m_data_gigs) = @_; my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s"; my($temp) = "Top $name by message count"; my($format); if ($html) { print "

$temp

\n"; print "\n"; print "\n"; # Align non-local addresses to the right (so all the .com's line up). # Local addresses are aligned on the left as they are userids. my $align = ($text !~ /local/i) ? 'right' : 'left'; $format = "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); $format = "%7d %10s %s\n"; } my $count = 1; my($key); foreach $key (sort { $$m_count{$b} <=> $$m_count{$a} || $$m_data_gigs{$b} <=> $$m_data_gigs{$a} || $$m_data{$b} <=> $$m_data{$a} || $a cmp $b } keys %{$m_count}) { printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key); last if $count++ >= $topcount; } print "
MessagesBytes\u$text
%d%s%s
\n" if $html; print "\n"; $temp = "Top $name by volume"; if ($html) { print "

$temp

\n"; print "\n"; print "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); } $count = 1; foreach $key (sort { $$m_data_gigs{$b} <=> $$m_data_gigs{$a} || $$m_data{$b} <=> $$m_data{$a} || $$m_count{$b} <=> $$m_count{$a} || $a cmp $b } keys %{$m_count}) { printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key); last if $count++ >= $topcount; } print "\n"; print "
MessagesBytes\u$text
\n" if $html; } =head2 html_header(); $header = html_header($title); Print our HTML header and start the block. =cut sub html_header { my($title) = @_; my $text = << "EoText"; $title

$title

EoText return $text; } =head2 help(); help(); Display usage instructions and exit. =cut sub help { print << "EoText"; eximstats Version $VERSION Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt Parses exim mainlog files and generates a statistical analysis of the messages processed. Valid options are: -h histogram divisions per hour. The default is 1, and 0 suppresses histograms. Other valid values are: 2, 3, 5, 10, 15, 20, 30 or 60. -ne don't display error information -nr don't display relaying information -nr/pattern/ don't display relaying information that matches -nt don't display transport information -q list of times for queuing information single 0 item suppresses -t display top sources/destinations default is 50, 0 suppresses top listing -tnl omit local sources/destinations in top listing -t_remote_users show top user sources/destinations from non-local domains -html output the results in HTML -cache increased processing speed, but higher memory utilisation. -byhost show results by sending host (default unless bydomain or byemail is specified) -bydomain show results by sending domain. -byemail show results by sender's email address. EoText exit 1; } =head2 generate_parser(); $parser = generate_parser(); This subroutine generates the parsing routine which will be used to parse the mainlog. We take the base operation, and remove bits not in use. This improves performance depending on what bits you take out or add. I've tested using study(), but this does not improve performance. We store our parsing routing in a variable, and process it looking for #IFDEF (Expression) or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If the expression evaluates to true, then it is included/excluded accordingly. =cut sub generate_parser { my $parser = ' my($ip,$host,$email,$domain,$thissize,$size,$old,$new); my($tod,$m_hour,$m_min,$id,$flag); while (<$fh>) { next if length($_) < 38; next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/; ($tod,$m_hour,$m_min) = ($1,$2,$3); $id = substr($_, 20, 16); $flag = substr($_, 37, 2); #Strip away the timestamp, ID and flag (which could be "Com" for completed) #This speeds up the later pattern matches. $_ = substr($_, 40); $host = "local"; #Host is local unless otherwise specified. # Do some pattern matches to get the host and IP address. # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]". # We do 2 separate matches to keep the matches simple and fast. if (/\\sH=(\\S+)/) { $host = $1; # This is Steve\'s statement. It is broken for IPv6 and it does not include # the square brackets in $ip. # ($ip) = /\\sH=.*?\\[([\\d\\.]+)\\]/; # This is PH\'s statement. ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/; #ifdef ($do_sender{Domain}) if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) { # Remove the host portion from the DNS name. We ensure that we end up with # at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z". $domain = $1.$2; } #ENDIF ($do_sender{Domain}) } #IFDEF ($do_sender{Email}) $email = (/^(\S+)/) ? $1 : ""; #ENDIF ($do_sender{Email}) if ($tod lt $begin) { $begin = $tod; } elsif ($tod gt $end) { $end = $tod; } if ($flag eq "<=") { $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0; $size{$id} = $thissize; #IFDEF ($show_relay) if ($host ne "local") { # Save incoming information in case it becomes interesting # later, when delivery lines are read. my($from) = /^(\\S+)/; $from_host{$id} = "$host$ip"; $from_address{$id} = $from; } #ENDIF ($show_relay) #IFDEF ($local_league_table || $include_remote_users) if (/\sU=(\\S+)/) { my $user = $1; #IFDEF ($local_league_table && $include_remote_users) { #Store both local and remote users. #ENDIF ($local_league_table && $include_remote_users) #IFDEF ($local_league_table && ! $include_remote_users) if ($host eq "local") { #Store local users only. #ENDIF ($local_league_table && ! $include_remote_users) #IFDEF ($include_remote_users && ! $local_league_table) if ($host ne "local") { #Store remote users only. #ENDIF ($include_remote_users && ! $local_league_table) $received_count_user{$user}++; add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize); } } #ENDIF ($local_league_table || $include_remote_users) #IFDEF ($do_sender{Host}) $received_count{Host}{$host}++; add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize); #ENDIF ($do_sender{Host}) #IFDEF ($do_sender{Domain}) $received_count{Domain}{$domain}++; add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize); #ENDIF ($do_sender{Domain}) #IFDEF ($do_sender{Email}) $received_count{Email}{$email}++; add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize); #ENDIF ($do_sender{Email}) $total_received_count++; add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize); #IFDEF ($#queue_times >= 0) $arrival_time{$id} = $tod; #ENDIF ($#queue_times >= 0) #IFDEF ($hist_opt > 0) $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++; #ENDIF ($hist_opt > 0) } elsif ($flag eq "=>") { $size = $size{$id} || 0; if ($host ne "local") { $remote_delivered{$id} = 0 if !defined($remote_delivered{$id}); $remote_delivered{$id}++; #IFDEF ($show_relay) # Determine relaying address if either only one address listed, # or two the same. If they are different, it implies a forwarding # or aliasing, which is not relaying. Note that for multi-aliased # addresses, there may be a further address between the first # and last. if (defined $from_host{$id}) { if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) { ($old,$new) = ($1,$2); } else { $old = $new = ""; } if ("\\L$new" eq "\\L$old") { ($old) = /^(\\S+)/ if $old eq ""; my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " . "H=\\L$host\\E$ip A=\\L$old\\E"; if (!defined $relay_pattern || $key !~ /$relay_pattern/o) { $relayed{$key} = 0 if !defined $relayed{$key}; $relayed{$key}++; } else { $relayed_unshown++ } } } #ENDIF ($show_relay) } #IFDEF ($local_league_table || $include_remote_users) #IFDEF ($local_league_table && $include_remote_users) { #Store both local and remote users. #ENDIF ($local_league_table && $include_remote_users) #IFDEF ($local_league_table && ! $include_remote_users) if ($host eq "local") { #Store local users only. #ENDIF ($local_league_table && ! $include_remote_users) #IFDEF ($include_remote_users && ! $local_league_table) if ($host ne "local") { #Store remote users only. #ENDIF ($include_remote_users && ! $local_league_table) if (my($user) = split((/]*>)/; $user = "$user $parent" if defined $parent; } $delivered_count_user{$user}++; add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size); } } #ENDIF ($local_league_table || $include_remote_users) #IFDEF ($do_sender{Host}) $delivered_count{Host}{$host}++; add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size); #ENDIF ($do_sender{Host}) #IFDEF ($do_sender{Domain}) $delivered_count{Domain}{$domain}++; add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size); #ENDIF ($do_sender{Domain}) #IFDEF ($do_sender{Email}) $delivered_count{Email}{$email}++; add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size); #ENDIF ($do_sender{Email}) $total_delivered_count++; add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size); #IFDEF ($show_transport) my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:"; $transported_count{$transport}++; add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size); #ENDIF ($show_transport) #IFDEF ($hist_opt > 0) $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++; #ENDIF ($hist_opt > 0) } elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) { $delayed_count++; $delayed{$id} = 1; } elsif ($flag eq "**") { $had_error{$id} = 1 if defined ($size{$id}); #IFDEF ($show_errors) $errors_count{$_}++; #ENDIF ($show_errors) } elsif ($flag eq "Co") { #Completed? #IFDEF ($#queue_times >= 0) #Note: id_seconds() benchmarks as 42% slower than seconds() and computing #the time accounts for a significant portion of the run time. my($queued); if (defined $arrival_time{$id}) { $queued = seconds($tod) - seconds($arrival_time{$id}); delete($arrival_time{$id}); } else { $queued = seconds($tod) - id_seconds($id); } for ($i = 0; $i <= $#queue_times; $i++) { if ($queued < $queue_times[$i]) { $queue_bin[$i]++; $remote_queue_bin[$i]++ if $remote_delivered{$id}; last; } } $queue_more_than++ if $i > $#queue_times; #ENDIF ($#queue_times >= 0) # Get rid of data no longer needed - saves memory #IFDEF ($show_relay) delete($from_host{$id}); delete($from_address{$id}); #ENDIF ($show_relay) delete($size{$id}); } }'; # We now do a 'C preprocessor style operation on our parser # to remove bits not in use. my(%defines_in_operation,$removing_lines,$processed_parser); foreach (split (/\n/,$parser)) { if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) || (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) { $defines_in_operation{$1} = 1; $removing_lines = 1; } $processed_parser .= $_."\n" unless $removing_lines; if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) { delete $defines_in_operation{$1}; unless (keys %defines_in_operation) { $removing_lines = 0; } } } print "# START OF PARSER:\n$processed_parser\n\n# END OF PARSER\n" if $debug; return $processed_parser; } =head2 parse(); parse($parser,\*FILEHANDLE); This subroutine accepts a parser and a filehandle from main and parses each line. We store the results into global variables. =cut sub parse { my($parser,$fh) = @_; eval $parser; die ($@) if $@; } =head2 print_header(); print_header(); Print our headers and contents. =cut sub print_header { my $title = "Exim statistics from $begin to $end"; if ($html) { print html_header($title); print "\n
\n"; } else { print "\n$title\n"; } } =head2 print_grandtotals(); print_grandtotals(); Print the grand totals. =cut sub print_grandtotals { # Get the sender by headings and results. This is complicated as we can have # different numbers of columns. my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format); my(@received_totals,@delivered_totals); foreach ('Host','Domain','Email') { if ($do_sender{$_}) { my $total = keys %{$received_data{$_}}; push(@received_totals,scalar(keys %{$received_data{$_}})); push(@delivered_totals,scalar(keys %{$delivered_data{$_}})); $sender_html_header .= "${_}s"; $sender_txt_header .= " " x (7 - length($_)) . $_ . 's'; $sender_html_format .= "%d"; $sender_txt_format .= " %6d"; } } my($format1,$format2); if ($html) { print << "EoText";

Grand total summary

$sender_html_header EoText $format1 = "$sender_html_format"; $format2 = ""; } else { my $sender_spaces = " " x length($sender_txt_header); print << "EoText"; Grand total summary ------------------- $sender_spaces At least one address TOTAL Volume Messages $sender_txt_header Delayed Failed EoText $format1 = " %-16s %9s %6d $sender_txt_format"; $format2 = " %6d %4.1f%% %6d %4.1f%%", } my $volume = volume_rounded($total_received_data, $total_received_data_gigs); my $failed_count = keys %had_error; { no integer; printf("$format1$format2\n",'Received',$volume,$total_received_count, @received_totals,$delayed_count, ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0, $failed_count, ($total_received_count) ? ($failed_count*100/$total_received_count) : 0); } $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs); printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals); print "
TOTALVolumeMessagesAt least one addr
Delayed
At least one addr
Failed
%s%s%d%d%4.1f%%%d%4.1f%%
\n" if $html; } =head2 print_transport(); print_transport(); Print totals by transport. =cut sub print_transport { my($format1); if ($html) { print "

Deliveries by Transport

\n"; print "\n"; print "\n"; $format1 = ""; } else { print "Deliveries by transport\n"; print "-----------------------"; print "\n Volume Messages\n"; $format1 = " %-18s %6s %6d"; } my($key); foreach $key (sort keys %transported_data) { printf("$format1\n",$key, volume_rounded($transported_data{$key},$transported_data_gigs{$key}), $transported_count{$key}); } print "
 VolumeMessages
%s%s%d
\n" if $html; print "\n"; } =head2 print_relay(); print_relay(); Print our totals by relay. =cut sub print_relay { my $temp = "Relayed messages"; print "

$temp

\n" if $html; if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) { my $shown = 0; my $spacing = ""; my($format); if ($html) { print "\n"; print "\n"; $format = "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); $format = "%7d %s\n => %s\n"; } my($key); foreach $key (sort keys %relayed) { my $count = $relayed{$key}; $shown += $count; $key =~ s/[HA]=//g; my($one,$two) = split(/=> /, $key); printf($format, $count, $one, $two); $spacing = "\n"; } print "
CountFromTo
%d%s%s
\n

\n" if $html; print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n"; } else { print "No relayed messages\n"; print "-------------------\n" unless $html; } print "\n"; } =head2 print_errors(); print_errors(); Print our errors. In HTML, we display them as a list rather than a table - Netscape doesn't like large tables! =cut sub print_errors { my $total_errors = 0; if (scalar(keys %errors_count) != 0) { my $temp = "List of errors"; my($format); if ($html) { print "


$temp

\n"; print "
  • Count - Error\n"; $format = "
  • %d - %s\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); } my($key); foreach $key (sort keys %errors_count) { my $text = $key; chop($text); $total_errors += $errors_count{$key}; if ($html) { printf($format,$errors_count{$key},$text); } else { printf("%5d ", $errors_count{$key}); while (length($text) > 65) { my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/; last if !$first; printf("%s\n ", $first); $text = $rest; } printf("%s\n\n", $text); } } print "
\n

\n" if $html; } my $temp = "Errors encountered: $total_errors"; print $temp,"\n"; print "-" x length($temp),"\n" unless $html; } ################################################## # Main Program # ################################################## $last_timestamp = 0; $show_errors = 1; $show_relay = 1; $show_transport = 1; $topcount = 50; $local_league_table = 1; $include_remote_users = 0; $hist_opt = 1; @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60, 12*60*60, 24*60*60); # Decode options while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') { if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 } elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 } elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/) { if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 } } elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/) { @queue_times = split(/,/, $1); my($q); foreach $q (@queue_times) { $q = eval($q) + 0 } @queue_times = sort { $a <=> $b } @queue_times; @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0); } elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 } elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 } elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 } elsif ($ARGV[0] =~ /^-html$/) { $html = 1 } elsif ($ARGV[0] =~ /^-cache$/) { $cache_id_times = 1 } elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 } elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 } elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 } elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 } elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() } elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 } else { print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n"; help(); } shift; } # Default to display tables by sending Host. $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email}); for (my $i = 0; $i <= $#queue_times; $i++) { $queue_bin[$i] = 0; $remote_queue_bin[$i] = 0; } # Compute the number of slots for the histogram if ($hist_opt > 0) { if ($hist_opt > 60 || 60 % $hist_opt != 0) { print "Eximstats: -h must specify a factor of 60\n"; exit 1; } $hist_interval = 60/$hist_opt; $hist_number = (24*60)/$hist_interval; @received_interval_count = (0) x $hist_number; @delivered_interval_count = (0) x $hist_number; } #$queue_unknown = 0; $total_received_data = 0; $total_received_data_gigs = 0; $total_received_count = 0; $total_delivered_data = 0; $total_delivered_data_gigs = 0; $total_delivered_count = 0; $queue_more_than = 0; $delayed_count = 0; $relayed_unshown = 0; $begin = "9999-99-99 99:99:99"; $end = "0000-00-00 00:00:00"; # Generate our parser. my $parser = generate_parser(); if (@ARGV) { # Scan the input files and collect the data foreach my $file (@ARGV) { if ($file =~ /\.gz/) { unless (open(FILE,"gunzip -c $file |")) { print STDERR "Failed to gunzip -c $file: $!"; next; } } elsif ($file =~ /\.Z/) { unless (open(FILE,"uncompress -c $file |")) { print STDERR "Failed to uncompress -c $file: $!"; next; } } else { unless (open(FILE,$file)) { print STDERR "Failed to read $file: $!"; next; } } #Now parse the filehandle, updating the global variables. parse($parser,\*FILE); } } else { #No files provided. Parse STDIN, updating the global variables. parse($parser,\*STDIN); } if ($begin eq "9999-99-99 99:99:99") { print "**** No valid log lines read\n"; exit 1; } # Output our results. print_header(); print_grandtotals(); # Print totals by transport if required. print_transport() if $show_transport; # Print the deliveries per interval as a histogram, unless configured not to. # First find the maximum in one interval and scale accordingly. if ($hist_opt > 0) { print_histogram("Messages received", @received_interval_count); print_histogram("Deliveries", @delivered_interval_count); } # Print times on queue if required. if ($#queue_times >= 0) { print_queue_times("all messages", \@queue_bin,$queue_more_than); print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than); } # Print relay information if required. print_relay() if $show_relay; # Print the league tables, if topcount isn't zero. if ($topcount > 0) { foreach ('Host','Domain','Email') { next unless $do_sender{$_}; print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_}); } print_league_table("local sender", \%received_count_user, \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users); foreach ('Host','Domain','Email') { next unless $do_sender{$_}; print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_}); } print_league_table("local destination", \%delivered_count_user, \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users); } # Print the error statistics if required. print_errors() if $show_errors; if ($html) { print "\n\n" } # End of eximstats