Skip to content

Instantly share code, notes, and snippets.

@antonlindstrom
Created May 17, 2009 19:04
Show Gist options
  • Save antonlindstrom/113108 to your computer and use it in GitHub Desktop.
Save antonlindstrom/113108 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
#
# Collecting data from access.log and
# presenting data as top 10 lists.
#
# The lists are; IP adresses, domains,
# files, month, hours and browsers.
#
# When domains does not exists IP is printed.
#
# Author Anton Lindström
# me@antonlindstrom.com
use strict;
use warnings;
use HTML::ParseBrowser;
# Logfile
my $accessfile = "access.log";
# Declare global variables.
my %ipadresses;
my %efiles;
my %pmonth;
my %phours;
my %browsers;
my %pdomains;
# Open Apache logfile
open(ACCESS, $accessfile);
my @access = <ACCESS>;
close(ACCESS);
print "Collecting data, hold on..\n\n";
foreach (@access) {
# Parse logfile.
$_ =~ m/(^[0-9\.]+)[\s\-]+\[(.+)\]\s\"[a-z]+\s(.+)\sHTTP\/[\d.]+\"[\s\d]+\"\-\"\s\"(.+)\"\s(\".*)?/gi;
# List for better readability.
my ($ip, $datetime, $file, $browser) = ($1, $2, $3, $4);
# Split date for use in peaktimes.
my ($day, $month, $next) = split(/\//, $datetime);
my ($year, $hour, $minute, $second) = split(/:/, $next);
# Parse Browser.
my $ua = HTML::ParseBrowser->new($ENV{HTTP_USER_AGENT});
$ua->Parse($browser);
my $browsername = $ua->name;
# For every occurance of IP add +1.
if (exists $ipadresses{$ip}) {$ipadresses{$ip} += 1;}
else {$ipadresses{$ip} = 1;}
# For every occurance of file add +1.
if (exists $efiles{$file}) {$efiles{$file} += 1;}
else {$efiles{$file} = 1;}
# Peak months.
if (exists $pmonth{$month}) {$pmonth{$month} += 1;}
else {$pmonth{$month} = 1;}
# Peak hours.
if (exists $phours{"$hour:00"}) {$phours{"$hour:00"} += 1;}
else {$phours{"$hour:00"} = 1;}
# Peak hours.
if (exists $browsers{$browsername}) {$browsers{$browsername} += 1;}
else {$browsers{$browsername} = 1;}
# Domain lookup.
if($ip) {
my $dom = `host $ip`;
$dom =~ m/pointer\s(.+)/g;
$dom = $1;
# For every occurance of domains add +1.
if (exists $pdomains{$dom}) {$pdomains{$dom} += 1;}
else {$pdomains{$dom} = 1;}
}
}
# Print most frequent domains.
print "Most frequent visitors (domain):\n";
my $i=0;
# Print visits and Domains, sorted.
foreach my $key (sort {$pdomains{$b} <=> $pdomains{$a} } keys %pdomains) {
next if ($i == 10);
print "\t$pdomains{$key}\t $key\n";
$i++;
}
# Print most frequent IP addresses.
print "Most frequent visitors (IP):\n";
$i=0;
# Print visits and IP addresses, sorted.
foreach my $key (sort {$ipadresses{$b} <=> $ipadresses{$a} } keys %ipadresses) {
next if ($i == 10);
print "\t$ipadresses{$key}\t $key\n";
$i++;
}
# Print most frequent files accessed.
print "Most popular files:\n";
$i=0;
# Print amount of files and filenames, sorted.
foreach my $key (sort {$efiles{$b} <=> $efiles{$a} } keys %efiles) {
next if ($i == 10);
print "\t$efiles{$key}\t $key\n";
$i++;
}
# Print peak month.
print "Most popular month to access site:\n";
$i=0;
# Print amount of visits and month, sorted.
foreach my $key (sort {$pmonth{$b} <=> $pmonth{$a} } keys %pmonth) {
next if ($i == 10);
print "\t$pmonth{$key}\t $key\n";
$i++;
}
# Print peak hours.
print "Most popular hour to access site:\n";
$i=0;
# Print amount of visits and hours, sorted.
foreach my $key (sort {$phours{$b} <=> $phours{$a} } keys %phours) {
next if ($i == 10);
print "\t$phours{$key}\t $key\n";
$i++;
}
# Print most used browser.
print "Top 10 Browsers:\n";
$i=0;
# Print usage of browsers, sorted.
foreach my $key (sort {$browsers{$b} <=> $browsers{$a} } keys %browsers) {
next if ($i == 10);
print "\t$browsers{$key}\t $key\n";
$i++;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment