Last active
September 24, 2016 14:04
-
-
Save SergeyStorm/81f01ba7486b3d7e82a0f5a2887e014b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Script to extract unique URLs from Squid access log file with URL regexp | |
use strict; | |
$| = 1; | |
my $report_rate = 99; | |
my $sqacclog = '/var/log/squid/access.log'; | |
open LOG, '<', $sqacclog | |
or die "Can't open squid log $!"; | |
my $regexp = '.*'; | |
if ( $ARGV[0] ) { | |
$regexp = $ARGV[0]; | |
} | |
my $tcount = 0; | |
my $mcount = 0; | |
my $iter = 0; | |
my $printed = undef; | |
my %urls; | |
while( my $line = <LOG> ) { | |
chomp( $line ); | |
my @fields = split( /\s+/, $line ); | |
$tcount++ if $#fields == 9; | |
undef $printed; | |
if ( $fields[6] =~ m/$regexp/ ) { | |
$mcount++; | |
if ( $iter >= $report_rate ) { | |
print STDERR "\r$mcount of $tcount matched"; | |
$printed = 1; | |
$iter = 0; | |
} | |
$urls{ $fields[6] } = 1; | |
} | |
$iter++; | |
} | |
if ( not $printed ) { | |
print STDERR "$mcount of $tcount matched"; | |
} | |
print STDERR "\n"; | |
close LOG; | |
my @list = sort keys %urls; | |
print "$_\n" for @list; | |
print STDERR "\n$mcount regexp hits\n"; | |
print STDERR scalar( @list )." unique URLs\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment