Skip to content

Instantly share code, notes, and snippets.

@gdakram
Created November 25, 2013 03:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gdakram/7635694 to your computer and use it in GitHub Desktop.
Save gdakram/7635694 to your computer and use it in GitHub Desktop.
Poor man's unique URI count parsing apache access log with referrers.
require 'optparse'
require 'ostruct'
require 'pp'
# Default settings
options = OpenStruct.new :file => '', :from => nil, :to => nil, :include_referrer => false
OptionParser.new do |opts|
opts.banner = """
Usage: #{__FILE__} [options]\n
Takes an apache access log and finds the unique URI requests based on time ranges. Optionally show the referrers.
"""
opts.on '-f', '--file [FILE]', 'The apache access log file to parse' do |f|
options.file = f unless f.nil?
end
opts.on '--from [TIME]', 'The time in DD/Mon/YYYY:HH:MM:SS format. Example: 22/Jan/2013:11:36:20. Defaults to the log file\'s first line time' do |t|
options.from = t.gsub("/",'\/') unless t.nil?
end
opts.on '--to [TIME]', 'The time in DD/Mon/YYYY:HH:MM:SS format. Example: 22/Jan/2013:11:36:20. Defaults to the log file\'s last line time' do |t|
options.to = t.gsub("/",'\/') unless t.nil?
end
opts.on '--include_referrer', 'To show the unique referrers of the URI request' do |i|
options.include_referrer = i unless i.nil?
end
opts.on '--preserve_numbers', 'URI with digits will be xxx\'d out' do |i|
options.preserve_numbers = !i.nil? ? true : false
end
opts.parse!
end
# handy functions
def exec cmd;puts cmd;%x[ #{cmd} ];end
def green(msg='');puts "\033[32m#{msg}\033[37m";end
def yellow(msg='');puts "\033[33m#{msg}\033[37m";end
def purple(msg='');puts "\033[34m#{msg}\033[37m";end
def pink(msg='');puts "\033[35m#{msg}\033[37m";end
unless File.exist? options.file
raise RuntimeError, "File: '#{options.file}' doesn't exist."
end
if options.from.nil?
options.from = %x[ head -1 #{options.file} | awk '{print $4}' ].chomp.gsub("[",'').gsub("/",'\/')
end
if options.to.nil?
options.to = %x[ tail -1 #{options.file} | awk '{print $4}' ].chomp.gsub("[",'').gsub("/",'\/')
end
lines = exec "sed -n '/#{options.from}/,/#{options.to}/p' #{options.file} | awk '{print $4 \" \" $7 \" \" $11}'"
requests = {}
lines.split("\n").each do |line|
fields = line.split " "
time, request, referrer = fields[0].gsub("[",""), fields[1].gsub(/\?.*/,''), fields[2].gsub("\"","")
unless options.preserve_numbers
request = request.gsub(/\d{2,}/,'xxx')
referrer = referrer.gsub(/\d{2,}/,'xxx')
end
requests[ request ] ||= {
'referrer' => {},
'count' => 0,
}
requests[ request ]['count'] += 1
requests[ request ]['referrer'][referrer] = nil if options.include_referrer
end
keys = requests.keys.sort { |a,b| requests[b]['count'] <=> requests[a]['count'] }
green "Count:\tURIs"
pink "*" * 100 + "\n"
keys.each do |key|
if options.include_referrer
referrer_column = requests[key]['referrer'].keys.sort{ |a,b| a <=> b }.collect { |r| "\t#{r}\n" }.join("")
end
green "#{requests[key]['count']}:\t#{key}"
if options.include_referrer
purple "\tUnique Referrers:"
yellow referrer_column
end
puts "*" * 100 + "\n"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment