Skip to content

Instantly share code, notes, and snippets.

@apeiros
Last active February 16, 2018 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apeiros/e5252a391d5ae6de90bc9b3f06496417 to your computer and use it in GitHub Desktop.
Save apeiros/e5252a391d5ae6de90bc9b3f06496417 to your computer and use it in GitHub Desktop.
Extract pieces from line-based logfiles
# WARNING: lots of edge-cases are not properly handled. this is just code I wrote to inspect some
# rails logs by hand in pry where I could easily deal manually in edge-cases.
# however, feel free to fork and fix the edge-cases (and tell me about it!)
#
# use like this:
# start_time = Time.local(2017,1,1)
# end_time = Time.local(2017,1,2)
# puts rails_log_within("path/to/logfile.log", start_time, end_time)
require "time" # Time.iso8601 is stdlib, not core
# read the next full line from a position (i.e. from start of a file, or
# immediately after a newline until the next newline) and return the offsets
# e.g. a File with "012\n456\n89"
# read_line_at(file, 0) # => [0, 3, "012\n"]
# read_line_at(file, 1) # => [4, 7, "456\n"]
# read_line_at(file, 4) # => [4, 7, "456\n"]
# read_line_at(file, 5) # => [8, 9, "89"]
# read_line_at(file, 10) # => [nil, nil, nil]
def read_line_at(file, pos)
start_offset = pos
if pos.zero?
buffer = file.gets
elsif pos > 0
file.seek(pos-1, IO::SEEK_SET)
if file.read(1) == "\n"
buffer = file.gets
else
skip = file.gets
if skip
start_offset = pos + skip.bytesize # skip 1 line
buffer = file.gets
else
buffer = nil
end
end
else
raise ArgumentError, "pos argument must be positive"
end
if buffer
[start_offset, start_offset+buffer.bytesize, buffer]
else
[nil, nil, nil]
end
end
def binary_search_line_in_path(path, **args, &block)
return binary_search_line(path, **args&block) if path.is_a?(IO)
File.open(path, 'rb') { |file| binary_search_line(file, **args) }
end
# The block acts as "needle", i.e. your block must return -1,0,1 or :next_line to indicate whether
# the needle was found. -1 means we're left of the needle, 0 means we found it, 1 means we're right
# of the needle. :next_line is to be used if the current line did not contain anything to compare
# to the needle (e.g. if you're looking for timestamps, and they're not on every line).
#
# @return [nil, Array<Integer>]
# Returns [from_offset, to_offset] of the first occurrence which is bigger than the needle
# Returns nil if there's no line which is bigger than the needle
def binary_search_line(file, start_at: 0, stop_at: nil)
raise ArgumentError, "file argument must be an IO" unless file.is_a?(IO)
previous_pos = -1
cur_left = 0
cur_right = 0
left_boundary = start_at
right_boundary = stop_at || file.size-1
position = (left_boundary+right_boundary) >> 1
rv = nil
until left_boundary >= right_boundary || position == previous_pos
previous_pos = position
previous_bounds = [cur_left, cur_right]
cur_left, cur_right, line = read_line_at(file, position)
case rv = yield(line)
when -1 then left_boundary = cur_right; position = (left_boundary+right_boundary) >> 1
when 1 then right_boundary = cur_left; position = (left_boundary+right_boundary) >> 1
when 0 then return [cur_left, cur_right]
when :next_line then position = cur_right # do not move boundaries, only position (boundaries only move when something to compare was found)
else raise "your block must return -1, 0, 1 or :next_line - got #{rv.inspect}"
end
# p position: previous_pos, cur_left: cur_left, cur_right: cur_right, left_boundary: left_boundary, right_boundary: right_boundary
end
if rv == -1
previous_bounds
else
[cur_left, cur_right]
end
end
def rails_log_within(path, start_time, end_time)
File.open(path, 'rb') { |file|
offset1, _, offset2, _ = [start_time, end_time].flat_map { |time|
binary_search_line(file) { |line|
time_string = line[/\[(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+)/, 1]
if time_string
cmp = Time.iso8601(time_string)
cmp <=> time
else
:next_line
end
}
}
file.seek(offset1, IO::SEEK_SET)
file.read(offset2-offset1+1)
}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment