#!/cs/local/bin/perl
#
# divs by zero pointed out by Stig Sandbeck Mathisen <ssm@online.no>
#   fixed 1997/09/05 (BWE)

$LOGFILE = '/var/log/news/binary.log';

require 'getopts.pl';
&Getopts( 'nq' ) || die "usage: $0 [-nq] [logfile]\n";
if( $#ARGV >= 0 ) { $LOGFILE = $ARGV[0] }

if( ! defined( $opt_n ) ) {
  for( $i=3; $i>=0; $i-- ) {
    next unless -f "$LOGFILE.$i";
    rename( "$LOGFILE.$i", "$LOGFILE." . ($i+1) );
  }
  rename( $LOGFILE, "$LOGFILE.0" ); $LOGFILE .= ".0";
}
exit if defined( $opt_q );

$totalArticles = 0; $totalBytes = 0;
%byTypeArticles = (); %byTypeBytes = ();
%byGroupArticles = (); %byGroupBytes = ();

open( LOG, "<$LOGFILE" ) || die "$0: cannot read $LOGFILE\n";
while( <LOG> ) {
  if( m|^\S+/\d+; (\d+) bytes; (\S+)| ) {
    $totalArticles++; $totalBytes += ( $bytes = $1 );
    $byTypeArticles{$2}++; $byTypeBytes{$2} += $bytes;
  } elsif( m|^\s+(\S+)/\d+ purged| ) {
    $group = $1; next if $group =~ /^alt\/binaries|^junk/;
    $byGroupArticles{$group}++; $byGroupBytes{$group} += $bytes;
  }
} close LOG;

printf "TOTAL: %d articles, %dkB", $totalArticles, $totalBytes/1024;
if( $totalArticles > 0 ) {
  printf " (%dkB avg size)\n", $totalBytes/$totalArticles/1024;
}

@groups = keys( %byGroupArticles );
printf "  in %d newsgroups\n", ( $#groups + 1 );

@types = keys( %byTypeArticles );
foreach $t (@types) {
  printf "  of type $t: %d articles, %dkB\n",
    $byTypeArticles{$t}, $byTypeBytes{$t}/1024
}

print "Top-20 'Hidden Binaries' Groups\n";
%topten = ();
for( $i=0; $i<20; $i++ ) {
  $max = 0; $maxg = '';
  foreach $g (@groups) {
    next if $topten{$g};
    next unless $byGroupBytes{$g} > $max;
    $max = $byGroupBytes{$g}; $maxg = $g;
  }
  last if $byGroupArticles{$maxg} < 1;
  $topten{$maxg} = 1; $g = $maxg; $g =~ tr[/][.];
  printf "  %8dkB $g (%d articles, %dkB avg size)\n",
         $byGroupBytes{$maxg}/1024, $byGroupArticles{$maxg},
         ( $byGroupBytes{$maxg}/$byGroupArticles{$maxg}/1024 );
}

print "Top-10 'Hidden Binaries' Big-N Groups\n";
%topten = ();
for( $i=0; $i<10; $i++ ) {
  $max = 0; $maxg = '';
  foreach $g (@groups) {
    next unless $g =~ /^(comp|humanities|misc|news|rec|sci|soc|talk)\//;
    next if $topten{$g};
    next unless $byGroupBytes{$g} > $max;
    $max = $byGroupBytes{$g}; $maxg = $g;
  }
  last if $byGroupArticles{$maxg} < 1;
  $topten{$maxg} = 1; $g = $maxg; $g =~ tr[/][.];
  printf "  %8dkB $g (%d articles, %dkB avg size)\n",
         $byGroupBytes{$maxg}/1024, $byGroupArticles{$maxg},
         ( $byGroupBytes{$maxg}/$byGroupArticles{$maxg}/1024 );
}

# EOF

