Skip to content

Instantly share code, notes, and snippets.

@sobstel sobstel/analyzer.rb
Created Jun 12, 2011

Embed
What would you like to do?
Apache log analyzer
#!/usr/bin/env ruby
# Apache log analyzer
# by Przemek Sobstel (przemek@sobstel.org)
require 'rubygems'
require 'slop'
require 'time'
class Array
def sum
reduce(0, :+)
end
def mean
sum / count.to_f
end
alias :avg :mean
def median
quartile(2)
end
def quartile(n)
raise "argument should be fixnum within 1-3 range" unless n.is_a?(Fixnum) && (1..3).include?(n)
boundary = ((count / 4) * n).floor
if count.odd?
self[boundary]
else
(self[boundary - 1].to_f + self[boundary].to_f) / 2
end
end
def variance
m = mean
s = reduce(0) { |s, i| s + (i - m)**2 }
(1 / count.to_f * s)
end
def standard_deviation
return Math.sqrt(variance)
end
end
class Analyzer
def analyze(*args)
args = args.last.is_a?(Hash) ? args.last : {}
raise "Missing logfile" unless args[:logfile]
raise "No such file - #{logfile}" unless File.exists?(args[:logfile])
raise "Missing formatter" unless args[:formatter]
args[:pattern] ||= Regexp.new(
'\[(?<time>.+)\]\s' +
'"\w+\s(?<url>.+)\sHTTP\/\d\.\d"\s' +
'(?<status>\d{3})\s' +
'(?<response_size>[-\d]+)' +
'.+' + # @todo: referrer
'.+' + # @todo: user agent
'(?<serve_time_s>\d+)\/(?<serve_time_ms>\d+)'
)
args['from'] = Time.parse(args['from']) if args['from']
args['to'] = Time.parse(args['to']) if args['to']
requests = 0
serve_times = []
puts "Reading #{args[:logfile]} ..."
File.open(args[:logfile], "r") do |file|
file.each do |line|
matched = line.match(args[:pattern])
vars = {}
matched.names.each { |name| vars[name.to_sym] = matched[name.to_sym] }
vars[:time][11] = ' '
time = Time.parse(vars[:time])
if (
(vars[:status].to_i == 200) &&
(!args['url'] || vars[:url] == args['url']) &&
(!args['urlpart'] || vars[:url].index(args['urlpart'])) &&
(!args['to'] || time <= args['to']) &&
(!args['from'] || time >= args['from'])
)
requests += 1
serve_times << vars[:serve_time_s].to_f + "0.#{vars[:serve_time_ms]}".to_f
end
end
end
puts "... done."
serve_times.sort!
data = { requests:requests, serve_times:serve_times }
args[:formatter].call(args, data)
rescue
puts "Error: #{$!}."
end
end
text_formatter = lambda do |args, data|
sep = "\n"
statline = lambda do |label, value|
value = sprintf('%.5f', value).rjust(2) if value.is_a?(Float)
"#{sprintf('%-19s', label)} => #{value}"
end
puts sep
puts "file: #{args[:logfile]}"
puts "url: #{args['url']}" if args['url']
puts "url contains: #{args['urlpart']}" if args['urlpart']
puts "from: #{args['from']} " if args['from']
puts "to: #{args['to']}" if args['to']
puts sep
puts statline.call('requests', data[:requests])
puts statline.call('mean', data[:serve_times].mean)
puts statline.call('standard deviation', data[:serve_times].standard_deviation)
puts statline.call('min', data[:serve_times].first)
puts statline.call('quartile 1', data[:serve_times].quartile(1))
puts statline.call('quartile 2 (median)', data[:serve_times].median)
puts statline.call('quartile 3', data[:serve_times].quartile(3))
puts statline.call('max', data[:serve_times].last)
puts sep
end
opts = Slop.parse(:strict => true) do
puts "Apache log analyzer (by przemek@sobstel.org), ver 0.1.0 (2011-06-10)\n\n"
on :u, :url, 'Exact URL to match', :optional => false
on :p, :urlpart, 'URL part to match', :optional => false
on :f, :from, 'Datetime from', :optional => false
on :t, :to, 'Datetime to', :optional => false
on :h, :help, 'Help' do
puts "Usage: analyzer.rb [options] logfile"
puts "Options:"
puts help
exit
end
end
args = opts.to_hash
args[:logfile] = ARGV.last
args[:formatter] = text_formatter
Analyzer.new.analyze(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.