Skip to content

Instantly share code, notes, and snippets.

@innerfence
Created March 23, 2010 19:19
Show Gist options
  • Save innerfence/341538 to your computer and use it in GitHub Desktop.
Save innerfence/341538 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use HTTP::Status;
use Term::ANSIColor qw(:constants);
use URI;
my %users;
my $usernum = 1;
my %count_for_referer;
my %count_for_path;
my $site = '-';
my $robots = 0;
my @filter_ips = qw(
);
my @filter_uris = qw(
/images/
/assets/
/favicon.ico
/ext/
);
my $ips_re = do {
my $ips_piped = join '|', @filter_ips;
qr{$ips_piped}xms;
};
my $uris_re = do {
my $uris_piped = join '|', @filter_uris;
qr{\A (?:$uris_piped)}xms;
};
my @user_agents = (
[ 'iPhone' => qr{iphone}xmsi ],
[ 'Windows IE' => qr{msie.*windows}xmsi ],
[ 'Windows Firefox' => qr{windows.*firefox}xmsi ],
[ 'Windows' => qr{windows}xmsi ],
[ 'Mac Firefox' => qr{macintosh.*firefox}xmsi ],
[ 'Mac Safari' => qr{macintosh.*safari}xmsi ],
[ 'Mac' => qr{macintosh}xmsi ],
);
my $bots_re = do {
my $bots_piped = join '|', qw(
picnik
wget
getright
libwww-perl
lwp-
yahoo
altavista
lycos
infoseek
lecodechecker
slurp
google
bot
spider
crawl
agent
seek
search
reap
worm
find
index
copy
fetch
ia_archive
zyborg
pingdom
);
qr{$bots_piped}xmsi;
};
while ( <> ) {
my ( $ip, $host, $date, $time, $http_request, $rc, $referer, $user_agent ) = m{
\A
(\S+) # Client IP
\s+
(\S+) # Host
\s+
\S+ # Username
\s+
\[(.*?):(.*?)[ ]\S+\] # Date/Time/Timezone
\s+
"(.*?)" # HTTP request
\s+
(\S+) # Response code
\s+
\S+ # Response size
\s+
"(.*?)" # Referer
\s+
"(.*?)" # User Agent
\s*
\z
}xms;
my ( $verb, $uri, $version ) = $http_request =~ m{
\A
(\S+) # Verb
\s+
(\S+) # URI
\s+
(\S+) # Version
\s*
\z
}xms;
if ( ( !$robots && $user_agent =~ m{$bots_re}xms ) ||
( $site && ( $host ne $site ) ) ||
( @filter_ips && ( $ip =~ m{$ips_re}xms ) ) ||
( @filter_uris && ( $uri =~ m{$uris_re}xms ) ) )
{
next;
}
my $is_bot = ( $user_agent =~ m{$bots_re}xms );
my $no_referer = ( $referer eq '-' );
my $refer_count;
my ( $refer_host, $refer_path, $refer_print_host );
if ( !$no_referer )
{
my $refer_uri = URI->new( $referer );
$refer_host = eval { $refer_uri->host } || q{};
$refer_path = eval { $refer_uri->path_query } || q{};
$refer_print_host = ( $refer_host ne $site );
if ( $refer_host =~ m{google[.]}xms )
{
$refer_path =~ s{(?<=[?&]q=)(.*?)(&|\z)}{@{[join '', BOLD, BLUE]}$1@{[RESET]}$2}xms;
}
if ( length $refer_host && $refer_print_host )
{
$count_for_referer{$refer_host}++ if $rc == 200;
$refer_count = $count_for_referer{$refer_host} || 0;
}
}
my $browser = q{};
if ( $is_bot )
{
$browser = $user_agent;
if ( $user_agent =~ m{(Googlebot|Yahoo![ ]Slurp)}xms )
{
$browser = $1;
}
}
else
{
for my $ua_ref ( @user_agents )
{
my ( $name, $pattern ) = @{$ua_ref};
if ( $user_agent =~ m{$pattern}xms )
{
$browser = $name;
last;
}
}
if ( !length $browser )
{
( $browser ) = $user_agent =~ m{\A (\w+)}xms;
$browser ||= 'Unknown';
}
}
my $uri_count = do {
# my $path = URI->new( $uri )->path;
# $path =~ s{(.)/\z}{$1}xms;
# ++$count_for_path{$path};
++$count_for_path{$uri};
};
$uri =~ s{ref=(.*?)(&|\z)}{@{[join '', BOLD, BLUE]}ref=$1@{[RESET]}$2}xms;
my $user = ( $users{$ip} ||= $usernum++ );
print BOLD,
sprintf( '%-6d ', $user ),
sprintf( '%-25s ', "$date $time" ),
sprintf( '%-20s ', $ip ),
$is_bot ? ( BLUE ) : (),
sprintf( '%-15s ', $browser ),
RESET,
"\n";
if ( !is_success( $rc ) )
{
print BOLD, ( is_error( $rc ) ? RED : GREEN ), ' -> ', $rc, ' ', status_message( $rc ), RESET, "\n";
}
if ( !$no_referer && ( $refer_print_host ? length $refer_host : length $refer_path ) )
{
print ' <- ', ( $refer_print_host ? ( "($refer_count) ", BOLD, BLUE, $refer_host, RESET ) : () ), RESET, $refer_path, "\n";
}
# print $no_referer ? ( BOLD, BLUE ) : (), ' -> ', RESET, "($uri_count) $uri\n";
print " -> ($uri_count) $uri\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment