|
# Usage: |
|
# bundle exec ruby 0_log_parser_example.rb <filename> <sequence_length> <show_top> |
|
# Sequence Length and Show Top must be between 1 and 20, defaults to 3 and 5. |
|
|
|
require 'bundler' |
|
Bundler.require |
|
|
|
class Customer |
|
attr_accessor :id, :visits |
|
|
|
def initialize |
|
@visits = [] |
|
end |
|
end |
|
|
|
# --------------- |
|
|
|
class LogEntry |
|
attr_accessor :timestamp, :customer_id, :page_id |
|
end |
|
|
|
# --------------- |
|
|
|
class LogParser |
|
attr_accessor :customers, :visits, :visit_patterns |
|
|
|
def initialize |
|
@customers = {} |
|
@visit_patterns = Hash.new(0) |
|
end |
|
|
|
def parse_file(filename) |
|
File.readlines(filename).each do |line| |
|
entry = parse_line(line) |
|
mark_visit(entry) |
|
end |
|
end |
|
|
|
def analyze_page_visits(pattern_length = 3) |
|
@customers.each do |id, customer| |
|
customer.visits.each_cons(pattern_length) do |cons| |
|
@visit_patterns[cons.join("-")] += 1 |
|
end |
|
end |
|
end |
|
|
|
def print_analysis(top = 5) |
|
max = @visit_patterns.sort_by { |pat, count| -count }.take(top) |
|
max.each do |pattern_count| |
|
puts "#{pattern_count[0]}: #{pattern_count[1]} times" |
|
end |
|
end |
|
|
|
def parse_line(line) |
|
parsed_line = line.split(", ") |
|
return LogEntry.new.tap do |entry| |
|
entry.timestamp = parsed_line[0].to_i |
|
entry.customer_id = parsed_line[1].gsub("C", '').to_i |
|
entry.page_id = parsed_line[2].gsub("P", '').to_i |
|
end |
|
end |
|
|
|
def mark_visit(entry) |
|
@customers[entry.customer_id] ||= Customer.new.tap do |c| |
|
c.id = entry.customer_id |
|
end |
|
@customers[entry.customer_id].visits << entry.page_id |
|
end |
|
end |
|
|
|
# --------------- |
|
|
|
if $0 == __FILE__ |
|
filename = ARGV[0] || "input_example.txt" |
|
sequence_length = [[ARGV[1] ? ARGV[1].to_i : 3, 1].max, 20].min |
|
show_top = [[ARGV[2] ? ARGV[2].to_i : 5, 1].max, 20].min |
|
|
|
lp = LogParser.new |
|
lp.parse_file(filename) |
|
lp.analyze_page_visits(sequence_length) && lp.print_analysis(show_top) |
|
end |
|
|
|
# --------------- |
|
|
|
RSpec.describe LogParser do |
|
let(:parser) { LogParser.new } |
|
let(:ex_entry) { |
|
LogEntry.new.tap do |e| |
|
e.timestamp = 1 |
|
e.customer_id = 2 |
|
e.page_id = 3 |
|
end |
|
} |
|
let(:ex_customer) { |
|
Customer.new.tap do |c| |
|
c.id = 1 |
|
c.visits = [1, 2, 3, 4, 5] |
|
end |
|
} |
|
|
|
describe "parse_line" do |
|
let(:line) { "1, C2, P3" } |
|
it "should return a LogEntry" do |
|
entry = parser.send(:parse_line, line) |
|
expect(entry).to be_a LogEntry |
|
end |
|
it "should mark the correct customer ID" do |
|
entry = parser.send(:parse_line, line) |
|
expect(entry.customer_id).to eq ex_entry.customer_id |
|
end |
|
it "should mark the correct page ID" do |
|
entry = parser.send(:parse_line, line) |
|
expect(entry.page_id).to be ex_entry.page_id |
|
end |
|
end |
|
|
|
describe "parse_file" do |
|
context "with a single log line" do |
|
let(:expected_input) { |
|
<<~INPUT |
|
1, C2, P3 |
|
INPUT |
|
} |
|
before do |
|
allow(File).to receive(:readlines).and_return(StringIO.new(expected_input.chomp)) |
|
end |
|
it "should create a customer object" do |
|
expect do |
|
parser.parse_file("test.txt") |
|
end.to change{ parser.customers.keys.length }.from(0).to(1) |
|
end |
|
end |
|
context "with a small log file" do |
|
let(:expected_input) { |
|
<<~INPUT |
|
1, C1, P1 |
|
2, C1, P2 |
|
3, C3, P3 |
|
INPUT |
|
} |
|
before do |
|
allow(File).to receive(:readlines).and_return(StringIO.new(expected_input.chomp)) |
|
end |
|
it "should create two Customer objects" do |
|
expect do |
|
parser.parse_file("test.txt") |
|
end.to change{ parser.customers.keys.length }.from(0).to(2) |
|
end |
|
|
|
it "should create a Customer with ID 1 and two page visits" do |
|
expect do |
|
parser.parse_file("test.txt") |
|
end.to change{ parser.customers[1]&.visits&.length }.from(nil).to(2) |
|
end |
|
end |
|
end |
|
|
|
describe "mark_visit" do |
|
let(:entry) { LogEntry.new} |
|
it "should populate @customers with a Customer object" do |
|
expect do |
|
parser.mark_visit(ex_entry) |
|
end.to change{ |
|
parser.customers[ex_entry.customer_id] |
|
}.from(nil).to(be_an_instance_of(Customer)) |
|
end |
|
it "should add to the Customer's @visits" do |
|
parser.mark_visit(ex_entry) |
|
expect(parser.customers[ex_entry.customer_id].visits).to eq([ex_entry.page_id]) |
|
end |
|
end |
|
|
|
describe "analyze_page_visits" do |
|
before do |
|
parser.customers[ex_customer.id] = ex_customer |
|
end |
|
it "should populate visit_patterns with counts for the consecutive visits requested" do |
|
expect do |
|
parser.analyze_page_visits(3) |
|
end.to change { parser.visit_patterns["1-2-3"] }.from(0).to(1) |
|
end |
|
end |
|
|
|
describe "print_analysis" do |
|
before do |
|
parser.visit_patterns["1-2-3"] = 5 |
|
parser.visit_patterns["2-3-4"] = 4 |
|
parser.visit_patterns["4-5-6"] = 3 |
|
end |
|
context "explicitly asking for 1 pattern" do |
|
it "should print only the top pattern" do |
|
expect { parser.print_analysis(1) }.to output("1-2-3: 5 times\n").to_stdout |
|
end |
|
end |
|
context "explicitly asking for the top two patterns" do |
|
it "should print the second top pattern" do |
|
expect { parser.print_analysis(2) }.to output(/2\-3\-4: 4 times/).to_stdout |
|
end |
|
it "should not print the bottom pattern" do |
|
expect { parser.print_analysis(2) }.not_to output(/4\-5\-6: 3 times/).to_stdout |
|
end |
|
end |
|
end |
|
|
|
end |