Last active
August 29, 2015 14:05
-
-
Save bf4/fbb5fd30c442d83a9fb1 to your computer and use it in GitHub Desktop.
streaming file, ripped off of ruby's csv lib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# streaming logic ripped from https://github.com/ruby/ruby/blob/trunk/lib/csv.rb | |
def raw_encoding(default = Encoding::ASCII_8BIT) | |
if @io.respond_to? :internal_encoding | |
@io.internal_encoding || @io.external_encoding | |
elsif @io.is_a? StringIO | |
@io.string.encoding | |
elsif @io.respond_to? :encoding | |
@io.encoding | |
else | |
default | |
end | |
end | |
def escape_re(str) | |
str.gsub(@re_chars) {|c| @re_esc + c} | |
end | |
def encode_str(*chunks) | |
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('') | |
end | |
def encode_re(*chunks) | |
Regexp.new(encode_str(*chunks)) | |
end | |
# see CSV#shift where it gets input until it finds the line ending | |
# then returns the parsed line. Our implementation is simpler as | |
# we're not expecting embedded \r or \n values in quoted fields | |
def next_line(io) | |
unless line = io.gets(@row_sep) | |
return nil | |
end | |
line.sub!(@line_end, "") | |
begin | |
return @parser.parse(line) | |
rescue *@parser_exceptions => e | |
STDERR.puts "#{e.class}: #{e.message}" | |
:parse_error # i.e. line has content, not eos, but is not parseable | |
end | |
end | |
def get_row_sep(io) | |
sample = io.gets | |
sample =~ encode_re("\r\n?|\n") | |
row_sep = $& | |
io.rewind | |
row_sep | |
end | |
def get_line_end(row_sep) | |
esc_row_sep = escape_re(row_sep) | |
encode_re(esc_row_sep, "\\z") | |
end | |
def init(io) | |
@encoding = raw_encoding(nil) || Encoding.default_internal || Encoding.default_external || Encoding::ASCII_8BIT | |
@re_esc = "\\".encode(@encoding) rescue "" | |
@re_chars = /#{%"[-\\]\\[\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding)}/ | |
@row_sep = get_row_sep(io) | |
@line_end = get_line_end(@row_sep) | |
end | |
def open_stream(filename) | |
@io = File.open(filename, 'r:bom|utf-8' ) | |
init(@io) | |
@io | |
end | |
def each_line(stream) | |
while line = next_line(stream) | |
yield line | |
end | |
ensure | |
stream.close | |
end | |
require 'json' | |
@parser = JSON | |
@parser_exceptions = [JSON::ParserError] | |
stream = open_stream('./json_log.log') | |
each_line(stream) do |line| | |
puts line['@timestamp'] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment