Last active
August 29, 2015 14:00
-
-
Save aleks-mariusz/665e92d2c24a62a64da6 to your computer and use it in GitHub Desktop.
June 1 2001 - After getting comfortable with perl by mid-2001 i tried my hand at some CGI programming (didn't someone say Perl was the glue of the web?).. This utility basically aggregates statistics generated by http-analyze in one nice central place. This was part of some work i was doing at an internet provider that did among other things, a …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
# written from scratch by Alex Koralewski | |
# time spent (so far): approximately 12 Hours (whew!) | |
# | |
# this program collects domain statistics created by http-analyze | |
# and summaries them into one pretty page. | |
# | |
use CGI qw/:standard/; | |
use CGI::Carp qw/fatalsToBrowser/; | |
use Time::HiRes qw/gettimeofday tv_interval/; | |
## use POSIX qw(strftime); | |
# new instance of the CGI class | |
my $query = new CGI; | |
################################################ | |
# initialize date/time parameters and defaults # | |
################################################ | |
# set up "firsts" parameters for dates | |
my $first_year = 99; | |
my $first_month = 9; | |
my $first_day = 1; | |
# calculate current date/time | |
my @now = localtime(time); | |
my $curr_year = $now[5]; | |
my $curr_month = $now[4]; | |
my $curr_day = $now[3]; | |
my $now_string = localtime(time); | |
## $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime; | |
############################################ | |
# initialize default parameters if not set # | |
############################################ | |
my $target_mode; my $target_year; my $target_month; | |
my $target_day; my $target_top_num; | |
if ($query->param()) { | |
$target_mode = $query->param('target_mode'); $target_year = $query->param('target_year'); | |
$target_month = $query->param('target_month'); $target_day = $query->param('target_day'); | |
$target_top_num = $query->param('target_top_num'); | |
} else { | |
$target_mode = 0; $target_year = $curr_year; | |
$target_month = $curr_month; $target_day = $curr_day; | |
$target_top_num = 10; | |
} | |
if (!defined($target_day)) { $target_day = 1; } | |
# what we're looking for in the stats pages | |
my $target = "Total"; | |
if ($target_mode) { | |
$target = "^<TR.*><TD ALIGN.+><B>$target_day</B>.+\$"; | |
} | |
# save year as nnnn format, and month as nn format; | |
my $the_year = 1900 + $target_year; | |
my $the_month = sprintf("%.2u",($target_month + 1)); | |
# set up month-processing data | |
my %month_names = (0=>"January", 1=>"February", 2=>"March", 3=>"April", | |
4=>"May", 5=>"June", 6=>"July", 7=>"August", | |
8=>"September", 9=>"October", 10=>"November", 11=>"December"); | |
@month_days = (31,28,31,30,31,30,31,31,30,31,30,31); | |
my $target_month_name = $month_names{$target_month}; | |
$target_num_days = $month_days[$target_month]; | |
########################################## | |
# establish parameter ranges with hashes # | |
########################################## | |
my %target_mode_hash; my %target_year_hash; my %target_month_hash; | |
my %target_day_hash; my %target_top_num_hash; | |
%target_mode_hash = (0=>"Monthly", 1=>"Daily"); | |
my @target_year_array; | |
foreach $this_year ($first_year .. $curr_year) { | |
push(@target_year_array, $this_year); push(@target_year_array, ($this_year + 1900)); | |
} | |
%target_year_hash = @target_year_array; undef(@target_year_array); | |
my $from_month = 0, my $to_month = 11; | |
if ($target_year == $first_year) { $from_month = $first_month; } | |
if ($target_year == $curr_year) { $to_month = $curr_month; } | |
my @target_month_array; | |
foreach $this_month ($from_month .. $to_month) { | |
push(@target_month_array, $this_month); push(@target_month_array, $month_names{$this_month}); | |
}; | |
%target_month_hash = @target_month_array; | |
undef(@target_month_array); undef($from_month); undef($to_month); | |
my $from_day = 1; my $to_day = $month_days[$target_month]; | |
if ($target_month == $first_month && $target_year == $first_year) { $from_day = $first_day; } | |
if ($target_month == $curr_month && $target_year == $curr_year) { $to_day = $curr_day; } | |
my @target_day_array; | |
foreach $this_day ($from_day .. $to_day) { | |
push(@target_day_array, $this_day); push(@target_day_array, $this_day); | |
} | |
%target_day_hash = @target_day_array; | |
undef(@target_day_array); undef($from_day); undef($to_day); | |
%target_top_num_hash = (5=>"Top 5", 10=>"Top 10", | |
20=>"Top 20", 50=>"Top 50", | |
100=>"Top 100", 9999=>"All"); | |
############################################### | |
# establish parsing and traversing parameters # | |
############################################### | |
# which line we'll want after our target is found | |
my $pos_pageviews = 3; | |
# directory where domains' files reside | |
my $dir = "/usr/local/etc/httpd/sites"; | |
# file containing stats with data at time requested | |
my $file = "htdocs/stats/www${the_year}/days${the_month}". | |
sprintf("%.2u",($target_year % 100)). ".html"; | |
# what the titles will be depend on the mode we using | |
my $title_string = "$month_names{$target_month} $the_year"; | |
if ($target_mode) { $title_string = "$month_names{$target_month} $target_day, $the_year"; } | |
$title_string = "Domain Statistics Summary for $target_top_num_hash{$target_top_num} of $title_string"; | |
# other declarations | |
my @pageviews; # will be array (of arrays) to contain the pageviews information | |
my $sum = 0; # will hold the total number of pageviews found | |
my $total_domains = 0; # will hold the total number of domains found | |
my $processed_domains = 0; # will hold the number of processed domains | |
my $untouched_domains; # will hold the difference of the above two | |
my $line; # will hold the current read line | |
my $i; # will be a simple counter | |
# set up page and such.. and print out the results.. | |
print $query->header, | |
$query->start_html(-title=>"$title_string", | |
-author=>'Alex Koralewski', | |
-bgcolor=>'#EFEFEF', | |
-base=>'false'); | |
print <<END_HEAD; | |
<center> | |
<p> | |
<table width="50%" border="2" cellspacing="1" cellpadding="1"> | |
<tr> | |
<td height="24" bgcolor="#CCCCCC" align="center"><font color="#000000"><b>$title_string</b></font></td> | |
</tr> | |
</table> | |
</p> | |
END_HEAD | |
&print_choice_menu; | |
print "<font size=\"-1\">Now Processing Request<blink><b>...</b></blink></font><br>\n"; | |
# set up a before-time.. | |
my $time0 = [gettimeofday]; | |
chdir($dir); # to the directory we go. | |
@dirs = glob("*.*"); # get list of *.* dirs | |
foreach $current_domain (@dirs) { # cycle through each directory | |
$total_domains++; | |
open(STATS,"<$current_domain/$file") # open stats file in this dir | |
|| # otherwise.. | |
next; # close file and skip rest | |
while (<STATS>) { # start processing file.. | |
$line = $_; # and reading lines.. | |
if ($line =~ m/$target/) { last; } # until we found our target | |
} | |
$i = $pos_pageviews; # set up counter to skip | |
while($i) { $line = <STATS>; $i--; }; # certain amount of lines | |
close(STATS); # then close file we opened | |
$line =~ m#^<TD.+<B>(.+)</B>.+</TD>$#; # parse html via regex | |
$total_pageviews = $1; # retrieve the proper term | |
$sum += $total_pageviews; # add to sum these pageviews | |
if ($total_pageviews > 0) { $processed_domains++; } | |
push(@pageviews, [ $current_domain, $total_pageviews ]); # and save into the array | |
} | |
@pageviews = reverse sort { $a->[1] <=> $b->[1] } @pageviews; # now sort the array | |
# calculate processing time.. | |
$elapsed_time = sprintf("%.4f",(tv_interval ($time0))); | |
# calculate the domains not touched.. | |
$untouched_domains = $total_domains - $processed_domains; | |
$processed_domains_pcntg = sprintf("%.2f",(($processed_domains / $total_domains)*100)); | |
$untouched_domains_pcntg = sprintf("%.2f",(($untouched_domains / $total_domains)*100)); | |
# format this big integer nicely with comma's | |
$sum = &commify($sum); | |
# initialize counter | |
$i = 1; | |
# set up page and such.. and print out the results.. | |
print <<END_INTRO; | |
<font size="+1"><b>done!</b></font> | |
</p> | |
<table width="45%" border="2" cellspacing="1" cellpadding="1"> | |
<tr> | |
<td align="center" colspan="2" bgcolor="#00CC00"><font color="#FFFFFF"><b>Totals</font></b></td> | |
</tr> | |
<tr> | |
<td align="left">Total Domains:</td> | |
<td align="center"><b>$total_domains</b></td> | |
</tr> | |
<tr> | |
<td align="left">Domains Shown:</td> | |
<td align="center"><b>$target_top_num_hash{$target_top_num}</b></td> | |
</tr> | |
<tr> | |
<td align="left">Domains Processed:</td> | |
<td align="center"><b>$processed_domains</b> ($processed_domains_pcntg %)</td> | |
</tr> | |
<tr> | |
<td align="left">Domains Unaccounted For:</td> | |
<td align="center"><b>$untouched_domains</b> ($untouched_domains_pcntg %)</td> | |
</tr> | |
<tr> | |
<td align="left">Total Pageviews:</td> | |
<td align="center"><b>$sum</b></td> | |
</tr> | |
<tr> | |
<td align="left">Processing Time:</b></td> | |
<td align="center"><b> $elapsed_time seconds </td> | |
</tr> | |
</table> | |
<p> | |
<table width="45%" border="2" cellspacing="1" cellpadding="1"> | |
<tr> | |
<td align="center" bgcolor="#0000FF"><b><font color="#FFFFFF"> Rank </font></b></td> | |
<td align="center" bgcolor="#9900FF"><b><font color="#FFFFFF"> Domain </font></b></td> | |
<td align="center" bgcolor="#FF0000"><b><font color="#FFFFFF"> Pageviews </font></b></td> | |
</tr> | |
END_INTRO | |
# cycle through each array in the array @pageviews and process/print out each | |
foreach $listing (@pageviews) { | |
print "<tr>\n"; | |
print " <td align=\"center\">$i.</td>\n"; | |
print " <td align=\"center\"><a href=\"http://$listing->[0]/stats/www${the_year}/days${the_month}". | |
sprintf("%.2u",($target_year % 100)). | |
".html\">$listing->[0]</a></td>\n"; | |
print " <td align=\"center\"><b>".&commify($listing->[1])."</b></td>\n"; | |
print "</tr>\n"; | |
if (++$i > $target_top_num) { last; } | |
} | |
# print out the footers and finish.. | |
print "</table>\n</p>\n"; | |
# &print_choice_menu; | |
print <<END_OUTRO; | |
<p> | |
<table border="3" width="75%" cellpadding="0" cellspacing="0"> | |
<tr> | |
<td><table width="100%" cellpadding="0" cellspacing="0"> | |
<tr> | |
<td nowrap align="left"><a href="/domstatsum2.txt">Domain Statistics Summary</a></td> | |
<td nowrap align="center">Copyright © 2000 by <a href="mailto:cynikal\@cynikal.net?subject=Domain Statistics Summary">Alex Koralewski</a></td> | |
<td nowrap align="right">$now_string</td> | |
</tr> | |
</table> | |
</td> | |
</tr> | |
</table> | |
</p> | |
</center> | |
</body> | |
</html> | |
END_OUTRO | |
sub print_choice_menu { | |
print $query->startform(-method=>'post', | |
-name=>'menu_choice', | |
-action=>$query->url(-absolute=>1), | |
-enctype=>"application/x-www-form-urlencoded"); | |
if ($target_mode) { | |
print $query->table({-border=>undef,-width=>'35%',-cellspacing=>1,-cellpadding=>1}, | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Mode' , | |
$query->popup_menu(-name=>'target_mode',-values=>[ keys(%target_mode_hash) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_mode",-labels=>\%target_mode_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Year' , | |
$query->popup_menu(-name=>'target_year',-values=>[ keys(%target_year_hash) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_year",-labels=>\%target_year_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Month' , | |
$query->popup_menu(-name=>'target_month',-values=>[ | |
sort { $a <=> $b } (keys(%target_month_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_month",-labels=>\%target_month_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Day' , | |
$query->popup_menu(-name=>'target_day',-values=>[ | |
sort { $a <=> $b } (keys(%target_day_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_day",-labels=>\%target_day_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Amount' , | |
$query->popup_menu(-name=>'target_top_num',-values=>[ | |
sort { $a <=> $b } (keys(%target_top_num_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_top_num",-labels=>\%target_top_num_hash)]) | |
]) | |
); | |
} else { | |
print $query->table({-border=>undef,-width=>'35%',-cellspacing=>1,-cellpadding=>1}, | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Mode' , | |
$query->popup_menu(-name=>'target_mode',-values=>[ keys(%target_mode_hash) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_mode",-labels=>\%target_mode_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Year' , | |
$query->popup_menu(-name=>'target_year',-values=>[ | |
sort { $a <=> $b } (keys(%target_year_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_year",-labels=>\%target_year_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Month' , | |
$query->popup_menu(-name=>'target_month',-values=>[ | |
sort { $a <=> $b } (keys(%target_month_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_month",-labels=>\%target_month_hash)]) | |
]), | |
Tr({-align=>"center",-valign=>"middle"},[ | |
td({-align=>"center"} ,[ 'Amount' , | |
$query->popup_menu(-name=>'target_top_num',-values=>[ | |
sort { $a <=> $b } (keys(%target_top_num_hash)) ], | |
-onChange=>'document.menu_choice.submit()', | |
-default=>"$target_top_num",-labels=>\%target_top_num_hash)]) | |
]) | |
); | |
} | |
print $query->submit(-name=>'Re-request Report'); | |
print $query->end_form; | |
} | |
# from the perldoc perlfaq5 question #10.. | |
sub commify { local $_ = shift; 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; return $_; } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment