Skip to content

Instantly share code, notes, and snippets.

@istana
Last active November 8, 2016 17:57
Show Gist options
  • Save istana/de7381f60ac0e398d9c5d9a3d05509aa to your computer and use it in GitHub Desktop.
Save istana/de7381f60ac0e398d9c5d9a3d05509aa to your computer and use it in GitHub Desktop.
messy stuff
# the script goes through a whole file - apache log file and creates http request log records
# handles common and combined log format
# can handle duplicates and can handle different log styles
# Common Log Format (CLF) - common
# "%h %l %u %t \"%r\" %>s %b"
# Common Log Format with Virtual Host
# "%v %h %l %u %t \"%r\" %>s %b"
# NCSA extended/combined log format - combined
# "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
require 'pry'
module SupportDaemons
module Castellan
class ApacheLogCruncher
def self.call(log_file_content:, apache_log_line:)
log_file_content.each_line do |line|
parse_and_fill_log_line_object(line, apache_log_line.new)
end
end
private
def self.parse_and_fill_log_line_object(line, line_object)
rest = []
match = line.match(/([\w\.]+) ([\w-]+) ([\w-]+) (.+)/)
line_object.host = match[1]
line_object.logname = match[2]
line_object.user = match[3]
rest = match[4]
#host, remote_logname, user, *rest = line.match(/([\w\.]+) (\w+) (\w+) (\w+) (\w)/)
#binding.pry
http_method = http_uri = http_protocol = http_status = ''
time_received = http_request = final_status = response_size = referer = user_agent = ''
line_object.time_received = time_received
line_object.http_request = http_request
line_object.http_status = http_status
line_object.response_size = response_size
line_object.referer = referer
line_object.user_agent = user_agent
line_object.save
end
end
end
end
require_relative '../apache_log_cruncher'
class ApacheLogLine
# logname = remote_logname - from identd - reverse DNS lookup from ip address (disabled by default)
attr_accessor :host, :logname, :user, :time_received,
:http_method, :http_uri, :http_protocol, :http_status,
:response_size, :referer, :user_agent
def save
true
end
end
describe SupportDaemons::Castellan::ApacheLogCruncher do
let(:apache_log) { StringIO.new(<<-STR
64.242.88.10 - - [07/Mar/2004:17:09:01 -0800] "GET /twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z] HTTP/1.1" 200 4284
64.242.88.10 - - [07/Mar/2004:17:10:20 -0800] "GET /twiki/bin/oops/TWiki/TextFormattingRules?template=oopsmore¶m1=1.37¶m2=1.37 HTTP/1.1" 200 11400
64.242.88.10 - - [07/Mar/2004:17:13:50 -0800] "GET /twiki/bin/edit/TWiki/DefaultPlugin?t=1078688936 HTTP/1.1" 401 12846
64.242.88.10 - - [07/Mar/2004:17:16:00 -0800] "GET /twiki/bin/search/Main/?scope=topic®ex=on&search=^g HTTP/1.1" 200 3675
STR
) }
let(:apache_log_one_line) { StringIO.new(<<-STR
64.242.88.10 - - [07/Mar/2004:17:09:01 -0800] "GET /twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z] HTTP/1.1" 200 4284
STR
) }
let(:log_line) { ApacheLogLine.new }
before do
allow(ApacheLogLine).to receive(:new).and_return(log_line)
end
it 'fills log line with correct data' do
SupportDaemons::Castellan::ApacheLogCruncher.call(log_file_content: apache_log_one_line, apache_log_line: ApacheLogLine)
aggregate_failures do
expect(log_line.host).to eq('64.242.88.10')
expect(log_line.remote_logname).to eq('-')
expect(log_line.user).to eq('-')
# TODO make it ISO8601
expect(log_line.time_received).to eq('07/Mar/2004:17:09:01 -0800')
expect(log_line.http_method).to eq('GET')
expect(log_line.http_uri).to eq('/twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z]')
expect(log_line.http_protocol).to eq('HTTP/1.1')
# final http status after redirects (if allowed in Apache)
expect(log_line.http_status).to eq(200)
expect(log_line.response_size).to eq(4284)
expect(log_line.referer).to eq(4284)
expect(log_line.user_agent).to eq(4284)
end
end
end
@erich
Copy link

erich commented Nov 8, 2016

LGTM :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment