Skip to content

Instantly share code, notes, and snippets.

@nitoyon
Created April 25, 2012 17:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nitoyon/2491537 to your computer and use it in GitHub Desktop.
Save nitoyon/2491537 to your computer and use it in GitHub Desktop.
Parse HTTP access log and find web font download ratio.
use strict;
use warnings;
use HTTP::BrowserDetect;
use Data::Dumper;
use DateTime::Format::HTTP;
use DateTime::Duration;
my $browser = new HTTP::BrowserDetect("");
my $total = 0;
my $summary = {};
my $hosts = {};
my $browser_cache = {};
while (<STDIN>) {
my $result = parseLine($_);
next unless $result;
my $host = lc $result->{host};
my $ua_raw = $result->{ua};
my $ua = $ua_raw;
$ua = $1 if $ua =~ /^(.*) Twitter for iPhone$/;
if (exists $browser_cache->{$ua}) {
$ua = $browser_cache->{$ua};
} else {
$browser->user_agent($ua);
next unless $browser->browser_string();
next if $browser->robot();
my $os = $browser->os_string() // "";
$os = "iOS $1" if $os eq "iOS" and $ua =~/OS ([0-9_]+)/;
$os = "Windows" if $browser->windows();
$os = $1 if $browser->android() and $ua=~/Android ([0-9\.]+)/;
$os =~ s/_/./g;
my $browser_version = $browser->public_version();
$browser_version = $1 if ($browser->safari() or $browser->mobile_safari()) and $ua=~m!AppleWebKit/([\d\.]+)!;
$ua = sprintf("%s,%s,%s,%s",
$browser->device_name() // "PC",
$os,
$browser->browser_string(),
$browser_version);
$browser_cache->{$ua_raw} = $ua;
}
my $key = $ua;
$summary->{$key} = {none => 0, none_queue => [] } unless exists $summary->{$key};
my $url = $result->{url};
$url =~s/\?|'$//;
if ($url eq "none") {
$summary->{$key}->{none}++;
push @{$summary->{$key}->{none_queue}}, $result->{time};
} elsif ($summary->{$key}->{none} > 0){
while (@{$summary->{$key}->{none_queue}}) {
my $d = shift @{$summary->{$key}->{none_queue}};
if (($result->{time} - $d)->seconds <= 30) {
$summary->{$key}->{$url}++;
$summary->{$key}->{none}--;
last;
}
}
}
$total++;
#last if $total >= 100;
}
my $count = {};
my $total_count = 0;
for my $ua(keys %$summary) {
for my $font(keys %{$summary->{$ua}}) {
next if $font eq "none_queue";
$count->{$font} = {count => 0, ua => {} } unless exists $count->{$font};
next unless $summary->{$ua}->{$font} > 0;
$count->{$font}->{count} += $summary->{$ua}->{$font};
$count->{$font}->{ua}->{$ua} += $summary->{$ua}->{$font};
$total_count += $summary->{$ua}->{$font};
}
}
my $i = 1;
for my $font(keys %$count) {
for (keys %{$count->{$font}->{ua}}) {
print sprintf("%s,$font,%d\n", $_, $count->{$font}->{ua}->{$_});
}
}
# per font summary
for my $font(keys %$count) {
print sprintf("%s %d (%f%%)\n", $font, $count->{$font}->{count}, $count->{$font}->{count} / $total_count * 100);
}
sub parseLine {
my $line = shift;
my ($host, $ident, $user, $time, $request, $status, $bytes, $referer, $agent) = ($line =~ /^([^ ]*) ([^ ]*) ([^ ]*) \[([^]]*)\] "(.*?)" ([^ ]*) ([^ ]*) "(.*?)" "(.*?)"/);
return unless $agent;
my $url = 'none';
$url = uc($1) if $request=~m!^GET /misc/js/uncopyable/ciphered-mplus.([^ ]+)!;
return {
time => DateTime::Format::HTTP->parse_datetime($time),
host => $host,
url => $url,
ua => $agent,
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment