Skip to content

Instantly share code, notes, and snippets.

@billdueber
Last active December 22, 2017 02:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save billdueber/e93c264f15c4ef598c3bb965c3962d18 to your computer and use it in GitHub Desktop.
Save billdueber/e93c264f15c4ef598c3bb965c3962d18 to your computer and use it in GitHub Desktop.
require 'traject'
require_relative 'recusive_json_reader'
require 'traject/debug_writer'
settings do
store "reader_class_name", "MyJsonHierarchyReader"
store "writer_class_name", "Traject::DebugWriter"
store "output_file", "recursive.out"
end
to_field('renamed_id') do |rec, acc|
acc << rec['my_id']
end
to_field 'id_plus_one', ->(rec, acc) { acc << rec['my_id'].to_i + 1}
{"id":1,"children":[{"id":2,"children":[{"id":3},{"id":4}]},{"id":5,"children":[{"id":6},{"id":7}]}]}
{"id":1,"children":[{"id":2,"children":[{"id":3},{"id":4}]},{"id":5,"children":[{"id":6},{"id":7}]}]}
require 'json'
class MyJsonHierarchyReader
# @param [#each] input_stream Probably a file, but any iterator will do
# so long as it returns a valid JSON object from #each
def initialize(input_stream, settings)
# ... whatever you need to do. Let's pretend it's
# a newline-delimited JSON file, since you didn't
# specify anything
@json_lines = input_stream
end
def each(&blk)
@json_lines.each do |jline|
h = JSON.parse jline
recursive_yield(h, blk)
end
end
def recursive_yield(h, blk)
doc = build_a_doc(h)
blk.call doc
Array(h['children']).each do |child|
recursive_yield(child, blk)
end
end
def build_a_doc(h)
{
"my_id" => h['id']
}
end
end
traject -c indexer.rb records.ndj
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment