Skip to content

Instantly share code, notes, and snippets.

@danhealy
Last active January 2, 2019 18:05
Show Gist options
  • Save danhealy/09074d83b2d4e5db2d51087ce6c7975a to your computer and use it in GitHub Desktop.
Save danhealy/09074d83b2d4e5db2d51087ce6c7975a to your computer and use it in GitHub Desktop.
Given a log file, find the top consecutive visiting patterns

Code Sample - Log Parsing

I created this code sample in December, 2018 for an interview. The task is to parse a log file containing the following format:

<#>, C<#>, P<#>

Where the first number is a "timestamp," the second is a Client/Customer ID #, and the third is a Page #. The entry represents a customer visiting a specific page at a specific time.

It's assumed that the log file contains timestamps in ascending order.

After parsing the file, the sample analyzes sequences of page views - For example, in the default case, it will show the top 5 sequences of 3 consecutive page views by a single customer. The top sequence in the input_example.txt is page 3 to 1 to 10, which occurs 3 times.

# Usage:
# bundle exec ruby 0_log_parser_example.rb <filename> <sequence_length> <show_top>
# Sequence Length and Show Top must be between 1 and 20, defaults to 3 and 5.
require 'bundler'
Bundler.require
class Customer
attr_accessor :id, :visits
def initialize
@visits = []
end
end
# ---------------
class LogEntry
attr_accessor :timestamp, :customer_id, :page_id
end
# ---------------
class LogParser
attr_accessor :customers, :visits, :visit_patterns
def initialize
@customers = {}
@visit_patterns = Hash.new(0)
end
def parse_file(filename)
File.readlines(filename).each do |line|
entry = parse_line(line)
mark_visit(entry)
end
end
def analyze_page_visits(pattern_length = 3)
@customers.each do |id, customer|
customer.visits.each_cons(pattern_length) do |cons|
@visit_patterns[cons.join("-")] += 1
end
end
end
def print_analysis(top = 5)
max = @visit_patterns.sort_by { |pat, count| -count }.take(top)
max.each do |pattern_count|
puts "#{pattern_count[0]}: #{pattern_count[1]} times"
end
end
def parse_line(line)
parsed_line = line.split(", ")
return LogEntry.new.tap do |entry|
entry.timestamp = parsed_line[0].to_i
entry.customer_id = parsed_line[1].gsub("C", '').to_i
entry.page_id = parsed_line[2].gsub("P", '').to_i
end
end
def mark_visit(entry)
@customers[entry.customer_id] ||= Customer.new.tap do |c|
c.id = entry.customer_id
end
@customers[entry.customer_id].visits << entry.page_id
end
end
# ---------------
if $0 == __FILE__
filename = ARGV[0] || "input_example.txt"
sequence_length = [[ARGV[1] ? ARGV[1].to_i : 3, 1].max, 20].min
show_top = [[ARGV[2] ? ARGV[2].to_i : 5, 1].max, 20].min
lp = LogParser.new
lp.parse_file(filename)
lp.analyze_page_visits(sequence_length) && lp.print_analysis(show_top)
end
# ---------------
RSpec.describe LogParser do
let(:parser) { LogParser.new }
let(:ex_entry) {
LogEntry.new.tap do |e|
e.timestamp = 1
e.customer_id = 2
e.page_id = 3
end
}
let(:ex_customer) {
Customer.new.tap do |c|
c.id = 1
c.visits = [1, 2, 3, 4, 5]
end
}
describe "parse_line" do
let(:line) { "1, C2, P3" }
it "should return a LogEntry" do
entry = parser.send(:parse_line, line)
expect(entry).to be_a LogEntry
end
it "should mark the correct customer ID" do
entry = parser.send(:parse_line, line)
expect(entry.customer_id).to eq ex_entry.customer_id
end
it "should mark the correct page ID" do
entry = parser.send(:parse_line, line)
expect(entry.page_id).to be ex_entry.page_id
end
end
describe "parse_file" do
context "with a single log line" do
let(:expected_input) {
<<~INPUT
1, C2, P3
INPUT
}
before do
allow(File).to receive(:readlines).and_return(StringIO.new(expected_input.chomp))
end
it "should create a customer object" do
expect do
parser.parse_file("test.txt")
end.to change{ parser.customers.keys.length }.from(0).to(1)
end
end
context "with a small log file" do
let(:expected_input) {
<<~INPUT
1, C1, P1
2, C1, P2
3, C3, P3
INPUT
}
before do
allow(File).to receive(:readlines).and_return(StringIO.new(expected_input.chomp))
end
it "should create two Customer objects" do
expect do
parser.parse_file("test.txt")
end.to change{ parser.customers.keys.length }.from(0).to(2)
end
it "should create a Customer with ID 1 and two page visits" do
expect do
parser.parse_file("test.txt")
end.to change{ parser.customers[1]&.visits&.length }.from(nil).to(2)
end
end
end
describe "mark_visit" do
let(:entry) { LogEntry.new}
it "should populate @customers with a Customer object" do
expect do
parser.mark_visit(ex_entry)
end.to change{
parser.customers[ex_entry.customer_id]
}.from(nil).to(be_an_instance_of(Customer))
end
it "should add to the Customer's @visits" do
parser.mark_visit(ex_entry)
expect(parser.customers[ex_entry.customer_id].visits).to eq([ex_entry.page_id])
end
end
describe "analyze_page_visits" do
before do
parser.customers[ex_customer.id] = ex_customer
end
it "should populate visit_patterns with counts for the consecutive visits requested" do
expect do
parser.analyze_page_visits(3)
end.to change { parser.visit_patterns["1-2-3"] }.from(0).to(1)
end
end
describe "print_analysis" do
before do
parser.visit_patterns["1-2-3"] = 5
parser.visit_patterns["2-3-4"] = 4
parser.visit_patterns["4-5-6"] = 3
end
context "explicitly asking for 1 pattern" do
it "should print only the top pattern" do
expect { parser.print_analysis(1) }.to output("1-2-3: 5 times\n").to_stdout
end
end
context "explicitly asking for the top two patterns" do
it "should print the second top pattern" do
expect { parser.print_analysis(2) }.to output(/2\-3\-4: 4 times/).to_stdout
end
it "should not print the bottom pattern" do
expect { parser.print_analysis(2) }.not_to output(/4\-5\-6: 3 times/).to_stdout
end
end
end
end
puts "Create log file"
File.open("input_example.txt", "w+") do |f|
500.times do |i|
page = (rand(2) == 0) ? (rand(10) + 1) : (rand(20) + 1)
c = rand(12) + 1
f.puts("#{i}, C#{c}, P#{page}")
end
end
puts "Done"
0, C5, P3
1, C6, P5
2, C8, P19
3, C8, P2
4, C4, P3
5, C11, P5
6, C6, P2
7, C6, P6
8, C2, P2
9, C2, P9
10, C10, P10
11, C8, P5
12, C10, P10
13, C11, P8
14, C6, P1
15, C6, P3
16, C8, P2
17, C7, P7
18, C8, P2
19, C12, P2
20, C3, P6
21, C8, P14
22, C10, P5
23, C6, P7
24, C1, P2
25, C4, P2
26, C11, P11
27, C12, P5
28, C1, P4
29, C6, P5
30, C6, P1
31, C1, P1
32, C11, P7
33, C9, P10
34, C9, P1
35, C7, P10
36, C5, P9
37, C3, P3
38, C2, P9
39, C5, P3
40, C4, P12
41, C8, P3
42, C10, P6
43, C5, P17
44, C4, P8
45, C3, P1
46, C12, P8
47, C4, P2
48, C2, P2
49, C3, P10
50, C11, P4
51, C12, P7
52, C3, P3
53, C12, P4
54, C12, P7
55, C6, P9
56, C3, P7
57, C3, P9
58, C7, P4
59, C1, P2
60, C3, P6
61, C11, P2
62, C1, P20
63, C11, P7
64, C6, P9
65, C2, P10
66, C1, P7
67, C3, P9
68, C10, P17
69, C9, P6
70, C6, P13
71, C2, P9
72, C11, P3
73, C11, P8
74, C8, P7
75, C2, P9
76, C7, P5
77, C7, P13
78, C9, P4
79, C4, P14
80, C3, P9
81, C9, P8
82, C2, P14
83, C4, P8
84, C10, P4
85, C12, P8
86, C7, P4
87, C3, P14
88, C10, P3
89, C11, P2
90, C11, P4
91, C5, P9
92, C5, P16
93, C4, P5
94, C3, P10
95, C10, P17
96, C7, P5
97, C11, P5
98, C9, P14
99, C6, P3
100, C12, P2
101, C7, P2
102, C8, P8
103, C2, P5
104, C6, P5
105, C10, P6
106, C12, P13
107, C4, P15
108, C8, P2
109, C4, P8
110, C12, P9
111, C11, P1
112, C8, P7
113, C3, P5
114, C8, P3
115, C8, P9
116, C11, P1
117, C5, P17
118, C1, P3
119, C10, P3
120, C8, P7
121, C1, P17
122, C12, P3
123, C2, P7
124, C5, P3
125, C4, P8
126, C11, P6
127, C6, P4
128, C5, P7
129, C6, P6
130, C2, P2
131, C11, P10
132, C6, P9
133, C9, P6
134, C6, P20
135, C8, P7
136, C11, P4
137, C10, P9
138, C1, P7
139, C2, P9
140, C11, P13
141, C2, P6
142, C6, P10
143, C5, P4
144, C2, P10
145, C10, P4
146, C4, P19
147, C8, P8
148, C9, P15
149, C5, P7
150, C7, P7
151, C5, P8
152, C9, P8
153, C6, P7
154, C8, P10
155, C7, P4
156, C12, P8
157, C2, P17
158, C10, P4
159, C8, P10
160, C6, P7
161, C8, P19
162, C7, P14
163, C8, P11
164, C10, P8
165, C11, P7
166, C8, P6
167, C9, P4
168, C7, P5
169, C12, P16
170, C3, P5
171, C3, P7
172, C10, P1
173, C10, P2
174, C5, P18
175, C4, P11
176, C10, P1
177, C7, P19
178, C10, P1
179, C10, P14
180, C12, P7
181, C12, P1
182, C10, P19
183, C5, P1
184, C8, P2
185, C4, P6
186, C9, P12
187, C10, P4
188, C11, P6
189, C8, P7
190, C4, P9
191, C7, P4
192, C10, P19
193, C5, P10
194, C7, P5
195, C5, P15
196, C6, P15
197, C10, P4
198, C3, P12
199, C2, P9
200, C10, P16
201, C8, P2
202, C1, P7
203, C6, P5
204, C12, P12
205, C6, P1
206, C6, P10
207, C3, P11
208, C7, P8
209, C10, P10
210, C7, P19
211, C11, P20
212, C6, P13
213, C4, P13
214, C2, P12
215, C8, P1
216, C7, P18
217, C5, P13
218, C1, P4
219, C1, P9
220, C7, P11
221, C1, P1
222, C10, P1
223, C12, P3
224, C9, P4
225, C7, P5
226, C11, P3
227, C4, P1
228, C8, P8
229, C3, P9
230, C11, P3
231, C4, P8
232, C5, P7
233, C7, P6
234, C9, P13
235, C1, P10
236, C3, P4
237, C12, P9
238, C8, P8
239, C11, P10
240, C11, P7
241, C11, P9
242, C12, P19
243, C3, P19
244, C8, P15
245, C10, P2
246, C11, P1
247, C8, P4
248, C7, P9
249, C7, P11
250, C10, P5
251, C4, P7
252, C4, P6
253, C1, P16
254, C10, P15
255, C7, P13
256, C11, P5
257, C12, P17
258, C5, P1
259, C9, P15
260, C9, P6
261, C5, P7
262, C4, P15
263, C4, P9
264, C11, P10
265, C3, P1
266, C2, P3
267, C5, P5
268, C1, P3
269, C6, P9
270, C9, P4
271, C8, P2
272, C9, P12
273, C1, P6
274, C5, P12
275, C5, P9
276, C5, P4
277, C5, P5
278, C4, P9
279, C9, P9
280, C7, P15
281, C4, P7
282, C10, P11
283, C8, P6
284, C12, P8
285, C2, P1
286, C10, P5
287, C4, P11
288, C10, P3
289, C4, P3
290, C4, P17
291, C10, P17
292, C12, P20
293, C7, P14
294, C2, P10
295, C7, P9
296, C2, P7
297, C7, P3
298, C9, P8
299, C11, P10
300, C8, P14
301, C7, P2
302, C9, P3
303, C7, P4
304, C10, P2
305, C2, P7
306, C10, P8
307, C5, P2
308, C11, P9
309, C8, P10
310, C5, P9
311, C9, P9
312, C8, P14
313, C10, P5
314, C4, P2
315, C12, P7
316, C6, P17
317, C10, P20
318, C3, P20
319, C5, P3
320, C10, P15
321, C12, P8
322, C7, P7
323, C5, P11
324, C4, P1
325, C3, P2
326, C10, P10
327, C12, P6
328, C5, P1
329, C1, P1
330, C12, P6
331, C5, P8
332, C9, P17
333, C5, P1
334, C12, P3
335, C1, P4
336, C1, P9
337, C1, P17
338, C11, P2
339, C10, P3
340, C12, P8
341, C5, P18
342, C7, P13
343, C4, P5
344, C7, P10
345, C7, P9
346, C12, P12
347, C8, P5
348, C12, P7
349, C9, P15
350, C12, P7
351, C12, P6
352, C1, P16
353, C10, P1
354, C12, P3
355, C9, P4
356, C11, P8
357, C6, P9
358, C2, P4
359, C4, P4
360, C1, P1
361, C3, P4
362, C10, P10
363, C2, P9
364, C3, P5
365, C9, P6
366, C12, P5
367, C8, P6
368, C11, P6
369, C5, P1
370, C4, P5
371, C9, P5
372, C12, P3
373, C9, P2
374, C4, P1
375, C8, P9
376, C12, P13
377, C3, P3
378, C3, P5
379, C7, P6
380, C12, P7
381, C8, P19
382, C10, P7
383, C7, P9
384, C3, P10
385, C8, P7
386, C8, P9
387, C2, P3
388, C8, P1
389, C12, P17
390, C1, P3
391, C10, P3
392, C10, P13
393, C5, P5
394, C10, P19
395, C7, P4
396, C7, P2
397, C12, P13
398, C2, P9
399, C4, P3
400, C1, P3
401, C3, P8
402, C10, P4
403, C5, P19
404, C10, P3
405, C8, P13
406, C8, P7
407, C4, P1
408, C12, P3
409, C6, P16
410, C4, P7
411, C9, P10
412, C3, P9
413, C4, P15
414, C6, P17
415, C7, P18
416, C1, P1
417, C4, P1
418, C4, P8
419, C8, P7
420, C7, P11
421, C9, P10
422, C12, P19
423, C6, P4
424, C5, P8
425, C2, P7
426, C1, P5
427, C7, P3
428, C9, P7
429, C5, P6
430, C7, P18
431, C1, P9
432, C12, P10
433, C4, P5
434, C2, P5
435, C6, P1
436, C3, P9
437, C5, P20
438, C9, P10
439, C8, P5
440, C12, P2
441, C1, P10
442, C9, P10
443, C11, P1
444, C11, P4
445, C12, P20
446, C3, P20
447, C6, P3
448, C6, P7
449, C5, P11
450, C7, P17
451, C5, P7
452, C7, P8
453, C5, P10
454, C11, P19
455, C3, P2
456, C7, P8
457, C4, P7
458, C5, P3
459, C5, P2
460, C7, P10
461, C11, P7
462, C5, P8
463, C11, P18
464, C7, P17
465, C10, P2
466, C6, P10
467, C2, P5
468, C11, P4
469, C10, P3
470, C6, P1
471, C5, P8
472, C3, P16
473, C7, P13
474, C9, P9
475, C2, P6
476, C10, P6
477, C3, P18
478, C9, P18
479, C12, P1
480, C4, P5
481, C7, P8
482, C8, P2
483, C9, P10
484, C6, P6
485, C1, P8
486, C12, P8
487, C12, P9
488, C5, P8
489, C6, P7
490, C1, P14
491, C9, P5
492, C9, P10
493, C5, P20
494, C8, P9
495, C1, P1
496, C5, P8
497, C4, P5
498, C10, P6
499, C10, P9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment