Skip to content

Instantly share code, notes, and snippets.

@sobstel
Created June 12, 2011 12:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sobstel/1021497 to your computer and use it in GitHub Desktop.
Save sobstel/1021497 to your computer and use it in GitHub Desktop.
Apache log analyzer
#!/usr/bin/env ruby
# Apache log analyzer
# by Przemek Sobstel (przemek@sobstel.org)
require 'rubygems'
require 'slop'
require 'time'
class Array
def sum
reduce(0, :+)
end
def mean
sum / count.to_f
end
alias :avg :mean
def median
quartile(2)
end
def quartile(n)
raise "argument should be fixnum within 1-3 range" unless n.is_a?(Fixnum) && (1..3).include?(n)
boundary = ((count / 4) * n).floor
if count.odd?
self[boundary]
else
(self[boundary - 1].to_f + self[boundary].to_f) / 2
end
end
def variance
m = mean
s = reduce(0) { |s, i| s + (i - m)**2 }
(1 / count.to_f * s)
end
def standard_deviation
return Math.sqrt(variance)
end
end
class Analyzer
def analyze(*args)
args = args.last.is_a?(Hash) ? args.last : {}
raise "Missing logfile" unless args[:logfile]
raise "No such file - #{logfile}" unless File.exists?(args[:logfile])
raise "Missing formatter" unless args[:formatter]
args[:pattern] ||= Regexp.new(
'\[(?<time>.+)\]\s' +
'"\w+\s(?<url>.+)\sHTTP\/\d\.\d"\s' +
'(?<status>\d{3})\s' +
'(?<response_size>[-\d]+)' +
'.+' + # @todo: referrer
'.+' + # @todo: user agent
'(?<serve_time_s>\d+)\/(?<serve_time_ms>\d+)'
)
args['from'] = Time.parse(args['from']) if args['from']
args['to'] = Time.parse(args['to']) if args['to']
requests = 0
serve_times = []
puts "Reading #{args[:logfile]} ..."
File.open(args[:logfile], "r") do |file|
file.each do |line|
matched = line.match(args[:pattern])
vars = {}
matched.names.each { |name| vars[name.to_sym] = matched[name.to_sym] }
vars[:time][11] = ' '
time = Time.parse(vars[:time])
if (
(vars[:status].to_i == 200) &&
(!args['url'] || vars[:url] == args['url']) &&
(!args['urlpart'] || vars[:url].index(args['urlpart'])) &&
(!args['to'] || time <= args['to']) &&
(!args['from'] || time >= args['from'])
)
requests += 1
serve_times << vars[:serve_time_s].to_f + "0.#{vars[:serve_time_ms]}".to_f
end
end
end
puts "... done."
serve_times.sort!
data = { requests:requests, serve_times:serve_times }
args[:formatter].call(args, data)
rescue
puts "Error: #{$!}."
end
end
text_formatter = lambda do |args, data|
sep = "\n"
statline = lambda do |label, value|
value = sprintf('%.5f', value).rjust(2) if value.is_a?(Float)
"#{sprintf('%-19s', label)} => #{value}"
end
puts sep
puts "file: #{args[:logfile]}"
puts "url: #{args['url']}" if args['url']
puts "url contains: #{args['urlpart']}" if args['urlpart']
puts "from: #{args['from']} " if args['from']
puts "to: #{args['to']}" if args['to']
puts sep
puts statline.call('requests', data[:requests])
puts statline.call('mean', data[:serve_times].mean)
puts statline.call('standard deviation', data[:serve_times].standard_deviation)
puts statline.call('min', data[:serve_times].first)
puts statline.call('quartile 1', data[:serve_times].quartile(1))
puts statline.call('quartile 2 (median)', data[:serve_times].median)
puts statline.call('quartile 3', data[:serve_times].quartile(3))
puts statline.call('max', data[:serve_times].last)
puts sep
end
opts = Slop.parse(:strict => true) do
puts "Apache log analyzer (by przemek@sobstel.org), ver 0.1.0 (2011-06-10)\n\n"
on :u, :url, 'Exact URL to match', :optional => false
on :p, :urlpart, 'URL part to match', :optional => false
on :f, :from, 'Datetime from', :optional => false
on :t, :to, 'Datetime to', :optional => false
on :h, :help, 'Help' do
puts "Usage: analyzer.rb [options] logfile"
puts "Options:"
puts help
exit
end
end
args = opts.to_hash
args[:logfile] = ARGV.last
args[:formatter] = text_formatter
Analyzer.new.analyze(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment