Last active
September 22, 2018 17:49
-
-
Save amolpujari/5966431 to your computer and use it in GitHub Desktop.
example of parsing large xml files in ruby using ox, define a handler, look up for a particular root element
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "awesome_print" | |
module XmlParsing | |
require "ox" | |
class Reader < ::Ox::Sax | |
def initialize file_path, target, target_handler | |
@target_handler = target_handler | |
@target = target | |
@file_path = file_path | |
@elements = [] | |
end | |
def count | |
@count ||= `grep "<#{@target}>" #{@file_path} -o | wc -l`.to_s.strip.to_i | |
@count = `grep "<#{@target}" #{@file_path} -o | wc -l`.to_s.strip.to_i if @count==0 | |
@count | |
end | |
def parse | |
xmlio = IO.new(IO.sysopen @file_path) | |
Ox.sax_parse self, xmlio | |
end | |
def start_element(name) | |
name = name.to_s.strip | |
@elements.push({ name=>{} }) | |
end | |
def end_element(name) | |
name = name.to_s.strip | |
if @elements.last[name] | |
@element = @elements.pop | |
@element.delete name | |
if @element.keys.count==1 and @element[:text] | |
inject_into_last name, @element[:text] | |
else | |
inject_into_last name, @element | |
end | |
end | |
@target_handler.next_element @element if @target==name | |
end | |
def inject_into_last name, value | |
return unless @elements.last | |
if @elements.last[name] | |
@elements.last[name] = [ @elements.last[name] ] unless @elements.last[name].is_a? Array | |
@elements.last[name].push value | |
else | |
@elements.last[name] = value | |
end | |
end | |
def attr(name, value) | |
return unless @elements.last | |
name = name.to_s.strip | |
value = value.to_s.strip | |
@elements.last[:attrs] ||= {} | |
@elements.last[:attrs][name] = value | |
end | |
def text(value) | |
return unless @elements.last | |
value = value.to_s.strip | |
@elements.last[:text] = value | |
end | |
end | |
end | |
module XMLPropertiesHandler | |
class Premthus | |
def next_element property | |
ap property | |
exit | |
end | |
end | |
end | |
XmlParsing::Reader.new("mits.xml", "Property", XMLPropertiesHandler::Premthus.new).parse | |
# | |
# | |
# | |
# below one is a nokogiri example which is trying easiest way to deal with nokogiri sax parsing | |
# http://amolnpujari.wordpress.com/2012/03/31/reading_huge_xml-rb/ | |
# | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment