Skip to content

Instantly share code, notes, and snippets.

@tenderlove
Created October 28, 2011 22:58
Show Gist options
  • Save tenderlove/1323792 to your computer and use it in GitHub Desktop.
Save tenderlove/1323792 to your computer and use it in GitHub Desktop.
###
# Read in XML as a stream, write out JSON as a stream. As little information
# is kept in memory as possible.
require 'nokogiri'
require 'psych'
class JSONTranslator < Nokogiri::XML::SAX::Document
attr_reader :emitter
def initialize emitter
@emitter = emitter
super()
end
def start_document
emitter.start_stream Psych::Nodes::Stream::UTF8
emitter.start_document [], [], true
end
def end_document
emitter.end_document
emitter.end_stream
end
def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
start_map
scalar name
start_map
unless attrs.empty?
scalar 'attributes'
mapping do
attrs.each { |attr|
scalar attr.localname
scalar attr.value
}
end
end
scalar 'children'
start_sequence
end
def characters string
scalar string
end
def end_element_namespace name, prefix = nil, uri = nil
end_sequence
end_map
end_map
end
private
def scalar string
emitter.scalar string, nil, nil, false, true, Psych::Nodes::Scalar::PLAIN
end
def start_map
emitter.start_mapping nil, nil, false, 1
end
def end_map
emitter.end_mapping
end
def start_sequence
emitter.start_sequence nil, nil, false, 1
end
def end_sequence
emitter.end_sequence
end
def mapping
start_map
yield
end_map
end
end
###
# +parser+ calls XML events on the +translator+. +translator+ calls JSON events
# on the +emitter+/
emitter = Psych::JSON::Stream::Emitter.new $stdout
translator = JSONTranslator.new emitter
parser = Nokogiri::XML::SAX::Parser.new translator
File.open ARGV[0], 'rb' do |f|
parser.parse f
end
__END__
<one two="three">
four
<ul>
<li>
hi
<div>lol</div>
mom!
</li>
<li>hello world</li>
</ul>
</one>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment