#!/usr/bin/perl -w # # Copyright (c) 2004-2005 # Chris Adams # written for HiWAAY Internet Services (http://www.hiwaay.net/) # ######################################################################## # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. ######################################################################## # # Parse the Arkeia log file and print or email a summary # # Use by putting in your server.prf (usually in /opt/arkeia/server): # # ARK_MAILCMD "/usr/local/sbin/arkeia-logparse.pl -S '%S' -e '%T' '%F'" # # Command line: arkeia-logparse.pl [options] # -d (optional): path to the Arkeia dbase directory # -s (optional): domain name to strip # -t (optional): print top count entries in each section # -e (optional): email report to this address # -S (optional): subject for emailed reports # -M (optional): path to sendmail # : log file to parse # If -d is not specified, the dbase directory is derived from the log # file path (assumed to be under /f3tmp/) # # History: # 1.0 - 2004-12-16 # rewrite old script for Arkeia 5.2 # # 1.1 - 2005-01-18 # handle interactive backups as well # handle flows that didn't end right # # 1.2 - 2005-02-17 # handle mailing directly # use POSIX qw(strftime); use Time::Local; use Sys::Hostname; use Getopt::Std; use strict; my $MIN = 10; my $TFMT = "%Y-%m-%d %H:%M:%S"; use vars qw($opt_d $opt_s $opt_t $opt_e $opt_S $opt_M); my $hostname = hostname; if ($hostname =~ /\.(.+)\.?$/) { $opt_s = $1; } $opt_t = 20; $opt_S = "Arkeia Backup Report"; $opt_M = "/usr/sbin/sendmail"; getopts ("d:s:t:e:S:M:"); my $DBDIR = $opt_d; my $DOM = $opt_s; my $TOP = $opt_t; my $EMAIL = $opt_e; my $SUBJECT = $opt_S; my $SENDMAIL = $opt_M; my $LOG = shift @ARGV || die "no log specified\n"; # Use the raw log file $LOG =~ s/\.txt$//; if (! $DBDIR) { ($DBDIR) = $LOG =~ /(.*)\/f3tmp\//; } # Read and parse the log file my %errors = my @flows = my %trees = my %info = my %tapes = my %drives = my %servers = (); open (LOG, $LOG) or die "open($LOG): $!\n"; while () { my $line = $_; chomp; next if (/^\s*$/); my ($prod, $date, $time, $minfo, $sid, $msg) = split (/\s+/, $_, 6); # Product should always be "A" next if ($prod ne "A"); # Get the entry timestamp my $stamp = dateparse ($date, $time); # Split the message info code my ($errtype, $msg_id, $err_id) = $minfo =~ /^(\S)(\d{3})(\d{5})$/; next if (! defined ($err_id)); # Split off the flow ID if appropriate my $flow = ""; if (($msg_id == 40) || ($msg_id == 25)) { ($flow, my $xmsg) = $msg =~ /^\[(\d+)\] (.*)$/; next if (! defined ($flow)); $msg = $xmsg; } # Save error entries my $iserr = 0; if ($errtype eq "E") { # Ignore attempts to back up transient files next if (($msg_id == 25) && (($err_id == 13) || ($err_id == 32) || ($err_id == 93))); # Ignore attempts to read special Windows files next if (($msg_id == 25) && ($err_id == 99)); $iserr = 1; } elsif ($errtype eq "W") { $iserr = 1 if (($msg_id == 29) && ($err_id == 55)); } if ($iserr) { my %e; $e{"line"} = $line; $e{"flow"} = $flow; $e{"msg"} = $msg; push @{$errors{$stamp}}, \%e; next; } # Gather stats from end-of-flow messages if (($msg_id == 40) && (($err_id == 26) || ($err_id == 27) || ($err_id == 28))) { my ($tree, $files, $meg, $sec, $rate, $errors, $code) = (split (/"/, $msg))[1,3,5,7,9,11,13]; next if (! $tree || ! defined ($errors)); $code ||= 0; # Strip domain if ((my $pos = index ($tree, "." . $DOM)) > 0) { substr ($tree, $pos, length ($DOM) + 1, ""); } my ($server) = $tree =~ /^([^:!]+)[:!]/; next if (! $server); # Build a flow record my %e; $e{"tree"} = $tree; $e{"files"} = $files; $e{"size"} = $meg; $e{"time"} = $sec; $e{"end"} = $stamp; $e{"start"} = $stamp - $sec; $e{"err_id"} = $err_id; $e{"code"} = $code; $e{"flow"} = $flow; $e{"server"} = $server; # Calculate the rate (but only if the flow lasted more than # $MIN seconds) if ($sec > $MIN) { $e{"speed"} = $meg / ($sec / 60); } else { $e{"speed"} = 0; } # Save the record, both by tree and by flow unshift @{$trees{$tree}}, \%e; push @{$flows[$flow]}, \%e; # Also keep track of some per-server stats foreach my $f (qw(files size time)) { $servers{$server}{$f} += $e{$f} || 0; } } # Some other interesting messages if (($msg_id == 28) && ($err_id == 18)) { $info{"aborted"} = 1; } if ($msg_id == 29) { if (($err_id == 5) || ($err_id == 6)) { my ($save, $drive, $pool) = (split (/"/, $msg))[1,3,5]; $info{"savepack"} = $save; $info{"drivepack"} = $drive; $info{"pool"} = $pool; $info{"btype"} = qw(total incremental)[$err_id - 5]; my ($start) = $sid =~ /^B(\d+)$/; $info{"start"} ||= $start; } if ($err_id == 8) { my ($owner) = (split (/"/, $msg))[1]; $info{"owner"} = $owner; } if ($err_id == 38) { $info{"notapes"} = 1; } if ($err_id == 44) { my ($save) = (split (/"/, $msg))[1]; push @{$info{"savepacks"}}, $save; } if ($err_id == 52) { my ($files, $meg, $compress, $seconds, $rate) = (split (/"/, $msg))[1,3,5,7,9]; $info{"files"} = $files; $info{"size"} = $meg; $info{"compress"} = $compress; $info{"seconds"} = $seconds; $info{"end"} = $stamp; } } if ($msg_id == 39) { if ($err_id == 1) { my ($drive) = (split (/'/, $msg))[1]; my $tape = $drives{$drive}; next if (! $tape); $tapes{$tape}{"end"} = $stamp; $drives{$drive} = ""; } if ($err_id == 4) { my ($tape, $drive) = (split (/'/, $msg))[1,3]; $drives{$drive} = $tape; $tapes{$tape}{"start"} = $stamp; } } if ($msg_id == 42) { if ($err_id == 0) { my ($per, $level) = (split (/"/, $msg))[1,3]; my ($start) = $sid =~ /^O(\d+)$/; $info{"name"} = $per; $info{"level"} = $level; $info{"start"} = $start; } elsif (($err_id == 8) || ($err_id == 9) || ($err_id == 10) || ($err_id == 11)) { my $type = (qw(incremental differential archive total))[$err_id - 8]; my ($base, $blevel) = (split (/"/, $msg))[1,3]; $base ||= ""; $blevel ||= ""; $info{"type"} = $type; $info{"base"} = $base; $info{"blevel"} = $blevel; } } } close (LOG); # Some things don't appear in interactive backups if (! $info{"name"}) { $info{"name"} = "Interactive"; $info{"type"} = $info{"btype"}; } # Overall info my $start = $info{"start"}; my $end = $info{"end"}; my $ran = $end - $start; # Calculate flow information my $maxflows = $#flows; my @flowend = (); foreach my $flow (@flows[1 .. $maxflows]) { my $last = $$flow[$#$flow]; push @flowend, $$last{"end"} || $end; } my $avgflows = 0; my $fran = (sort { $b <=> $a } @flowend)[0] - $start; foreach my $fend (@flowend) { $avgflows += ($fend - $start) / $fran; } my $fp = 1; if ($fran > 86400) { $fp = 4; } elsif ($fran > 3600) { $fp = 3; } elsif ($fran > 60) { $fp = 2; } # Gather some tape info my %tinfo = my %tstats = (); foreach my $tape (keys %tapes) { my $stats = tapestats ($tape); $tstats{$tape} = $$stats{"top"}; $tinfo{"status"}{$tstats{$tape}{"ST_CONTENT"}} ++; $tinfo{"used"} += $tstats{$tape}{"USED_SPACE"}; } # If the report goes to email, report STDOUT to sendmail my $sendpid; if ($EMAIL) { die "fork: $!\n" if (! defined ($sendpid = open (SEND, "|-"))); if (! $sendpid) { exec $SENDMAIL, "-oi", $EMAIL; die "exec($SENDMAIL -oi $EMAIL): $!\n"; } my $subject = $SUBJECT; if (%errors) { $subject .= " (" . scalar (keys %errors) . " errors)"; } open (STDOUT, ">&SEND") or die "dup(SEND): $!\n"; my $who = getpwuid ($<) . "\@" . hostname; print < To: $EMAIL Subject: $subject EOF } # Generate report my $msg = sprintf ("Report for %s backup %s%s, Savepack: %s", $info{"type"}, $info{"name"}, defined ($info{"level"}) ? sprintf (" (level %d)", $info{"level"}) : "", $info{"savepack"}); under ($msg); # Summary print "Started: ", strftime ($TFMT, localtime ($start)), "\n"; print "Ended: ", strftime ($TFMT, localtime ($end)), "\n"; my $fmtran = fmttime ($ran); $fmtran =~ s/^[0\s]+//; print "Ran for: ", $fmtran, "\n"; wprint (80, 4, "Savepacks: ", join (", ", @{$info{"savepacks"}})); printf "Backed up: %s files (%.2f/min), %s MB (%.2f/min)\n", comma ($info{"files"}), $info{"files"} / ($ran / 60), comma ($info{"size"}), $info{"size"} / ($ran / 60); print "Compressed: ", $info{"compress"}, "\n"; print "\n"; # If there were errors, display all error messages here if (%errors) { under ("Errors"); foreach my $e (sort keys %errors) { foreach my $r (@{$errors{$e}}) { print $$r{"line"}; } } } else { under ("No errors occurred during backup"); } print "\n"; # Print tape info under ("Tape information"); print "Tape pool: ", $info{"pool"}, "\n"; wprint (80, 4, "Tapes used (", scalar (keys %tapes), "): ", join (" ", sort keys %tapes)); print "Tapes filled: ", $tinfo{"status"}{"TPSTC_FULL"} || 0, "\n"; print "Tapes partially filled: ", $tinfo{"status"}{"TPSTC_INUSE"} || 0, "\n"; print "Space used on tapes: ", comma ($tinfo{"used"}), "MB\n"; my %TSTATUS = ( "TPSTC_FULL" => "Full", "TPSTC_INUSE" => "In use", ); foreach my $tape (sort keys %tapes) { print $tape, ": ", $TSTATUS{$tstats{$tape}{"ST_CONTENT"}}, ", ", comma ($tstats{$tape}{"USED_SPACE"}), "MB in use\n"; } print "\n"; # Flow distribution data under ("Parallel flow information"); print "Total flows: ", $maxflows, "\n"; printf "Average flows: %.2f\n", $avgflows; print "\n"; under ("Flow distribution (at least 1 minute)"); under ("Flows Run time"); my $fcnt = $maxflows; my $fstart = $start; foreach my $fend (sort { $a <=> $b } @flowend) { my $ftime = $fend - $fstart; my $t = fmttime ($ftime, $fp); $t =~ s/^0:/ :/; 1 while ($t =~ s/^(\s*)00:/$1 /); $t =~ s/^(\s+) /$1:/ if ($t !~ /:/); $t =~ s/^(\s*)0/$1 /; printf ("%5d %s\n", $fcnt--, $t) if ($ftime > 60); $fstart = $fend; } print "\n"; # Calculate per-server speed foreach my $s (keys %servers) { my $size = $servers{$s}{"size"} || 0; my $time = $servers{$s}{"time"} || 1; $servers{$s}{"speed"} = $size / ($time / 60); } my $cnt; # Display per-server stats under ("$TOP largest servers"); under (" MB MB/min Server "); $cnt = $TOP; foreach my $t (sort { $servers{$b}{"size"} <=> $servers{$a}{"size"} } keys %servers) { last if (! $cnt--); printf "%6d %7.2f %s\n", $servers{$t}{"size"}, $servers{$t}{"speed"}, $t; } print "\n"; under ("$TOP fastest servers"); under (" MB MB/min Server "); $cnt = $TOP; foreach my $t (sort { $servers{$b}{"speed"} <=> $servers{$a}{"speed"} } keys %servers) { last if (! $cnt--); printf "%6d %7.2f %s\n", $servers{$t}{"size"}, $servers{$t}{"speed"}, $t; } print "\n"; under ("$TOP slowest servers"); under (" MB MB/min Server "); $cnt = $TOP; foreach my $t (sort { $servers{$a}{"speed"} <=> $servers{$b}{"speed"} } keys %servers) { next if ($servers{$t}{"speed"} < 1); next if ($servers{$t}{"size"} < $MIN); last if (! $cnt--); printf "%6d %7.2f %s\n", $servers{$t}{"size"}, $servers{$t}{"speed"}, $t; } print "\n"; # Display per-tree stats under ("$TOP largest trees"); under (" MB MB/min Tree "); $cnt = $TOP; foreach my $t (sort { $trees{$b}[0]{"size"} <=> $trees{$a}[0]{"size"} } keys %trees) { last if (! $cnt--); printf "%6d %7.2f %s\n", $trees{$t}[0]{"size"}, $trees{$t}[0]{"speed"}, $t; } print "\n"; under ("$TOP fastest trees (at least $MIN sec)"); under (" MB MB/min Tree "); $cnt = $TOP; foreach my $t (sort { $trees{$b}[0]{"speed"} <=> $trees{$a}[0]{"speed"} } keys %trees) { last if (! $cnt--); printf "%6d %7.2f %s\n", $trees{$t}[0]{"size"}, $trees{$t}[0]{"speed"}, $t; } print "\n"; under ("$TOP slowest trees (at least $MIN sec, $MIN MB)"); under (" MB MB/min Tree "); $cnt = $TOP; foreach my $t (sort { $trees{$a}[0]{"speed"} <=> $trees{$b}[0]{"speed"} } keys %trees) { next if ($trees{$t}[0]{"speed"} < 1); next if ($trees{$t}[0]{"size"} < $MIN); last if (! $cnt--); printf "%6d %7.2f %s\n", $trees{$t}[0]{"size"}, $trees{$t}[0]{"speed"}, $t; } print "\n"; under ("$TOP longest running trees"); under (" MB MB/min Runtime Tree "); $cnt = $TOP; foreach my $t (sort { $trees{$b}[0]{"time"} <=> $trees{$a}[0]{"time"} } keys %trees) { last if (! $cnt--); my $ft = fmttime ($trees{$t}[0]{"time"}); $ft =~ s/^(\s*)0/$1 /; printf "%6d %7.2f %8s %s\n", $trees{$t}[0]{"size"}, $trees{$t}[0]{"speed"}, $ft, $t; } print "\n"; # Dump the raw log as well print "\n"; under ("Raw log"); open (LOG, $LOG) or die "open($LOG): $!\n"; while () { print; } close (LOG); # Done if ($EMAIL) { # Must close STDOUT first or perl will wait for sendmail to exit and # sendmail will wait for perl to close SEND close (STDOUT); close (SEND); } exit 0; ######################################################################## # Misc functions # This parses the Arkeia format date and time sub dateparse { my $date = shift; my $time = shift || "00:00:00"; my ($yr, $mon, $day) = $date =~ m!(\d+)/(\d+)/(\d+)!; die "Invalid date \"$date\"\n" if (! (defined ($yr) && defined ($mon) && defined ($day))); my ($hr, $min, $sec) = $time =~ /(\d+):(\d+):(\d+)/; die "Invalid time \"$time\"\n" if (! (defined ($hr) && defined ($min) && defined ($sec))); return timelocal ($sec, $min, $hr, $day, $mon - 1, $yr - 1900); } # Print a string and then print a row of "-" under it (underline) sub under { my $m = join ("", @_); print $m, "\n", "-" x length ($m), "\n"; } # Make a nicely formatted time string sub fmttime { my $t = shift; my $p = shift || 1; my $msg = ""; my $sec = $t % 60; $t = int ($t / 60); $msg = sprintf (":%02d", $sec); return $msg if (($p-- < 2) && ! $t); my $min = $t % 60; $t = int ($t / 60); $msg = sprintf ("%02d%s", $min, $msg); return $msg if (($p-- < 2) && ! $t); my $hr = $t % 24; $t = int ($t / 24); $msg = sprintf ("%02d:%s", $hr, $msg); return $msg if (($p-- < 2) && ! $t); $msg = sprintf ("%d:%s", $t, $msg); return $msg; } # Put commas into numbers sub comma { my @n = reverse (split (//, shift)); my $r = ""; while (@n) { $r = "," . $r if ($r !~ /^$/); $r = shift (@n) . $r; $r = shift (@n) . $r if (@n); $r = shift (@n) . $r if (@n); } return $r; } # Line wraped printout sub wprint { my $width = shift; my $indent = " " x shift; my @msg = split (/\s+/, join ("", @_)); my $started = my $len = 0; while (@msg) { my $m = shift @msg; my $l = length ($m); if ($started && (($len + $l) >= $width)) { print "\n", $indent; $len = 0; } elsif ($started) { print " "; $len ++; } print $m; $len += $l; $started = 1; } print "\n" if ($len); } # Read the Arkeia database to get info about a tape # This really should use "arkc" to fetch the info, but that requires # configuring access controls sub tapestats { my $tape = shift; # Get (and keep) the tape master list use vars qw(%TPMASTER); if (! %TPMASTER) { my $lst = readlst ($DBDIR . "/f3tape/tpmaster.lst", "NAME"); %TPMASTER = (%{$$lst{"items"}}); } die "Unknown tape \"$tape\"\n" if (! $TPMASTER{$tape}); my $tpm = $TPMASTER{$tape}; # Get (and keep) the location of the per-tape lists use vars qw(%TPLST); if (! %TPLST) { local (*D); opendir (D, $DBDIR . "/f3tape") or die "opendir($DBDIR/f3tape): $!\n"; my @tpdirs = grep { /^tpdir.+$/ } readdir (D); closedir (D); foreach my $d (@tpdirs) { my $dir = $DBDIR . "/f3tape/" . $d; opendir (D, $dir) or die "opendir($dir): $!\n"; my @tps = grep { /^tp.*\.lst$/ } readdir (D); closedir (D); foreach my $tp (@tps) { my ($id) = $tp =~ /^tp(.*)\.lst$/; $TPLST{$id} = $dir . "/" . $tp; } } } die "No tape lst file for \"$tape\"\n" if (! $TPLST{$$tpm{"TPID"}}); # Get the tape list file my $tpl = readlst ($TPLST{$$tpm{"TPID"}}); return $tpl; } # Read an Arkeia .lst file sub readlst { my $file = shift; my $field = shift; local (*F); open (F, $file) or die "open($file): $!\n"; my %ent; $ent{"top"} = {}; my $ir = $ent{"top"}; while () { next if (/^\s*$/); chomp; if (/^\s*}$/) { die "no such field \"$field\" in \"$file\"\n" if (! defined ($$ir{$field})); $ent{"items"}{$$ir{$field}} = $ir; $ir = $ent{"top"}; next; } if (/ITEM\s+{$/) { my %item = (); $ir = \%item; next; } my ($key, $val) = $_ =~ /^\s*"?([^"]+)"?\s+"(.*)"$/; $$ir{$key} = $val; } close (F); return \%ent; }