Skip to content

Instantly share code, notes, and snippets.

@raymondberg
Created June 3, 2015 22:11
Show Gist options
  • Save raymondberg/de673499294eb9e91bb1 to your computer and use it in GitHub Desktop.
Save raymondberg/de673499294eb9e91bb1 to your computer and use it in GitHub Desktop.
require 'json/stream'
class MonsterParser
FILE_BUFFER_SIZE = 1024
def initialize(file_path,xpath)
@file_obj = File.open(file_path,'r')
@element_stack = []
@xpath_stack = xpath.split("/").reverse
@in_customer_array = false
@finished_customer_array = false
_configure_parser
end
def _stack_push(item)
if @xpath_stack.size > 0
@xpath_stack.pop() if item == _get_top(@xpath_stack)
elsif ! @in_customer_array
raise Exception.new("XPath destination must target a JSON array") if ! item.is_a?(Array)
@in_customer_array = true
elsif ! @finished_customer_array
@element_stack.push(item)
end
end
def _get_top(stack)
return stack[-1] unless stack.size == 0
nil
end
def _extract_object(obj)
puts "Dumping a full object: #{obj}"
end
def _stack_merge_up
return if @finished_customer_array
value = @element_stack.pop()
parent_element = _get_top(@element_stack)
if parent_element.is_a?(String)
key = @element_stack.pop()
_get_top(@element_stack)[key] = value
elsif parent_element.nil?
if value.nil?
@finished_customer_array = true
else
_extract_object(value)
end
elsif
@element_stack[-1] << value
end
end
def _configure_parser
mParser = self
@parser = JSON::Stream::Parser.new do
start_object { mParser._stack_push({}) }
start_array { mParser._stack_push([]) }
end_object { mParser._stack_merge_up }
end_array { mParser._stack_merge_up }
key {|k| mParser._stack_push(k) }
value {|v| mParser._stack_push(v)
mParser._stack_merge_up }
end
end
def load_data
begin
while data = @file_obj.read(MonsterParser::FILE_BUFFER_SIZE) do
@parser << data
end
#rescue JSON::Stream::ParserError => e
end
end
end
if __FILE__==$0
parser = MonsterParser.new("customer100000.txt","customers")
parser.load_data
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment