Last active
November 8, 2016 17:57
-
-
Save istana/de7381f60ac0e398d9c5d9a3d05509aa to your computer and use it in GitHub Desktop.
messy stuff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# the script goes through a whole file - apache log file and creates http request log records | |
# handles common and combined log format | |
# can handle duplicates and can handle different log styles | |
# Common Log Format (CLF) - common | |
# "%h %l %u %t \"%r\" %>s %b" | |
# Common Log Format with Virtual Host | |
# "%v %h %l %u %t \"%r\" %>s %b" | |
# NCSA extended/combined log format - combined | |
# "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" | |
require 'pry' | |
module SupportDaemons | |
module Castellan | |
class ApacheLogCruncher | |
def self.call(log_file_content:, apache_log_line:) | |
log_file_content.each_line do |line| | |
parse_and_fill_log_line_object(line, apache_log_line.new) | |
end | |
end | |
private | |
def self.parse_and_fill_log_line_object(line, line_object) | |
rest = [] | |
match = line.match(/([\w\.]+) ([\w-]+) ([\w-]+) (.+)/) | |
line_object.host = match[1] | |
line_object.logname = match[2] | |
line_object.user = match[3] | |
rest = match[4] | |
#host, remote_logname, user, *rest = line.match(/([\w\.]+) (\w+) (\w+) (\w+) (\w)/) | |
#binding.pry | |
http_method = http_uri = http_protocol = http_status = '' | |
time_received = http_request = final_status = response_size = referer = user_agent = '' | |
line_object.time_received = time_received | |
line_object.http_request = http_request | |
line_object.http_status = http_status | |
line_object.response_size = response_size | |
line_object.referer = referer | |
line_object.user_agent = user_agent | |
line_object.save | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative '../apache_log_cruncher' | |
class ApacheLogLine | |
# logname = remote_logname - from identd - reverse DNS lookup from ip address (disabled by default) | |
attr_accessor :host, :logname, :user, :time_received, | |
:http_method, :http_uri, :http_protocol, :http_status, | |
:response_size, :referer, :user_agent | |
def save | |
true | |
end | |
end | |
describe SupportDaemons::Castellan::ApacheLogCruncher do | |
let(:apache_log) { StringIO.new(<<-STR | |
64.242.88.10 - - [07/Mar/2004:17:09:01 -0800] "GET /twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z] HTTP/1.1" 200 4284 | |
64.242.88.10 - - [07/Mar/2004:17:10:20 -0800] "GET /twiki/bin/oops/TWiki/TextFormattingRules?template=oopsmore¶m1=1.37¶m2=1.37 HTTP/1.1" 200 11400 | |
64.242.88.10 - - [07/Mar/2004:17:13:50 -0800] "GET /twiki/bin/edit/TWiki/DefaultPlugin?t=1078688936 HTTP/1.1" 401 12846 | |
64.242.88.10 - - [07/Mar/2004:17:16:00 -0800] "GET /twiki/bin/search/Main/?scope=topic®ex=on&search=^g HTTP/1.1" 200 3675 | |
STR | |
) } | |
let(:apache_log_one_line) { StringIO.new(<<-STR | |
64.242.88.10 - - [07/Mar/2004:17:09:01 -0800] "GET /twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z] HTTP/1.1" 200 4284 | |
STR | |
) } | |
let(:log_line) { ApacheLogLine.new } | |
before do | |
allow(ApacheLogLine).to receive(:new).and_return(log_line) | |
end | |
it 'fills log line with correct data' do | |
SupportDaemons::Castellan::ApacheLogCruncher.call(log_file_content: apache_log_one_line, apache_log_line: ApacheLogLine) | |
aggregate_failures do | |
expect(log_line.host).to eq('64.242.88.10') | |
expect(log_line.remote_logname).to eq('-') | |
expect(log_line.user).to eq('-') | |
# TODO make it ISO8601 | |
expect(log_line.time_received).to eq('07/Mar/2004:17:09:01 -0800') | |
expect(log_line.http_method).to eq('GET') | |
expect(log_line.http_uri).to eq('/twiki/bin/search/Main/SearchResult?scope=text®ex=on&search=Joris%20*Benschop[^A-Za-z]') | |
expect(log_line.http_protocol).to eq('HTTP/1.1') | |
# final http status after redirects (if allowed in Apache) | |
expect(log_line.http_status).to eq(200) | |
expect(log_line.response_size).to eq(4284) | |
expect(log_line.referer).to eq(4284) | |
expect(log_line.user_agent).to eq(4284) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
LGTM :)