tyrannosaur/**json2*sv.py**

## json2*sv.py
#!/usr/bin/env python

import os
import sys
import json
import re

def iter_obj(obj):
   """\
   Build a list of (object, namespace) tuples.
   An object is a non-dict object, and a namespace is the series of
   keys (in order) required to that object from the root object.
   """

   def make_namespace(namespace, key):
      return (namespace[:] or []) + [key]

   def make_work(obj, namespace):
      keys = obj.keys()
      return zip(obj.values(), [make_namespace(namespace, key) for key in keys])

   # Create a queue of objects to descend
   work_queue = make_work(obj, [])
   results = []

   while len(work_queue) > 0:
      obj, namespace = work_queue.pop()

      # If the current object is a dict, add it to the queue
      if isinstance(obj, dict):
         work_queue = make_work(obj, namespace) + work_queue
      else:
         results.append((obj, namespace))

   return results

def val_from_namespace(obj, namespace):
   """\
   Get a value from an object with the given namespace.
   """

   cur = obj
   for key in namespace:
      cur = cur[key]
   return cur

def convert(name):

   SEPARATOR = u'\t'
   EXTENSION = 'tsv'

   path, ext = os.path.splitext(name)
   out_name = '{0}.{1}'.format(path, EXTENSION)

   def clean(text):
      return re.sub(SEPARATOR + '+', u'', u'{0}'.format(text))

   def write_out(outfile, vals):
      line = SEPARATOR.join(vals)
      outfile.write(u'{0}\n'.format(line).encode('utf-8'))

   with open(name, 'r+b') as infile:
      with open(out_name, 'w+b') as outfile:

         data = json.loads(infile.read())

         if len(data) > 0:
            headers = [x[1] for x in iter_obj(data[0])]
            write_out(outfile, [u'.'.join(header) for header in headers])

            for obj in data[1:]:
               values = [clean(val_from_namespace(obj, header)) for header in headers]
               write_out(outfile, values)

if __name__ == '__main__':
   if len(sys.argv) <= 1:
      print('usage {0} [file 1] [file 2] ...'.format(sys.argv[0]))
      sys.exit(0)

   for name in sys.argv[1:]:
      convert(name)

## json2sv.py
#!/usr/bin/env python

import os
import sys
import json
import re

def iter_obj(obj):
   """\
   Build a list of (object, namespace) tuples.
   An object is a non-dict object, and a namespace is the series of
   keys (in order) required to that object from the root object.
   """

   def make_namespace(namespace, key):
      return (namespace[:] or []) + [key]

   def make_work(obj, namespace):
      keys = obj.keys()
      return zip(obj.values(), [make_namespace(namespace, key) for key in keys])

   # Create a queue of objects to descend
   work_queue = make_work(obj, [])
   results = []

   while len(work_queue) > 0:
      obj, namespace = work_queue.pop()

      # If the current object is a dict, add it to the queue
      if isinstance(obj, dict):
         work_queue = make_work(obj, namespace) + work_queue
      else:
         results.append((obj, namespace))

   return results

def val_from_namespace(obj, namespace):
   """\
   Get a value from an object with the given namespace.
   """

   cur = obj
   for key in namespace:
      cur = cur[key]
   return cur

def convert(name):

   SEPARATOR = u'\t'
   EXTENSION = 'tsv'

   path, ext = os.path.splitext(name)
   out_name = '{0}.{1}'.format(path, EXTENSION)

   def clean(text):
      return re.sub(SEPARATOR + '+', u'', u'{0}'.format(text))

   def write_out(outfile, vals):
      line = SEPARATOR.join(vals)
      outfile.write(u'{0}\n'.format(line).encode('utf-8'))

   with open(name, 'r+b') as infile:
      with open(out_name, 'w+b') as outfile:

         data = json.loads(infile.read())

         if len(data) > 0:
            headers = [x[1] for x in iter_obj(data[0])]
            write_out(outfile, [u'.'.join(header) for header in headers])

            for obj in data[1:]:
               values = [clean(val_from_namespace(obj, header)) for header in headers]
               write_out(outfile, values)

if __name__ == '__main__':
   if len(sys.argv) <= 1:
      print('usage {0} [file 1] [file 2] ...'.format(sys.argv[0]))
      sys.exit(0)

   for name in sys.argv[1:]:
      convert(name)

## sample.json
[
  { "_id" : { "$oid" : "507dc7905ef6dc0006e4cf83"} , "data" : { "power" : 1 } , "uploader" : "alice" , "created" : { "$date" : "2012-10-16T20:46:08.154Z"}}
 ,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf84"} , "data" : { "power" : 10} , "uploader" : "bob" , "created" : { "$date" : "2012-10-16T20:46:08.164Z"}}
 ,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf85"} , "data" : { "power" : 100} , "uploader" : "carol" , "created" : { "$date" : "2012-10-16T20:46:08.174Z"}}
 ,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf86"} , "data" : { "power" : 1000} , "uploader" : "david" , "created" : { "$date" : "2012-10-16T20:46:08.184Z"}}
]

## sample.tsv

          
            uploader
            created.$date
            data.power
            _id.$oid

            
              bob
              2012-10-16T20:46:08.164Z
              10
              507dc7905ef6dc0006e4cf84

            
              carol
              2012-10-16T20:46:08.174Z
              100
              507dc7905ef6dc0006e4cf85

            
              david
              2012-10-16T20:46:08.184Z
              1000
              507dc7905ef6dc0006e4cf86
	#!/usr/bin/env python

	import os
	import sys
	import json
	import re

	def iter_obj(obj):
	"""\
	Build a list of (object, namespace) tuples.
	An object is a non-dict object, and a namespace is the series of
	keys (in order) required to that object from the root object.
	"""

	def make_namespace(namespace, key):
	return (namespace[:] or []) + [key]

	def make_work(obj, namespace):
	keys = obj.keys()
	return zip(obj.values(), [make_namespace(namespace, key) for key in keys])

	# Create a queue of objects to descend
	work_queue = make_work(obj, [])
	results = []

	while len(work_queue) > 0:
	obj, namespace = work_queue.pop()

	# If the current object is a dict, add it to the queue
	if isinstance(obj, dict):
	work_queue = make_work(obj, namespace) + work_queue
	else:
	results.append((obj, namespace))

	return results

	def val_from_namespace(obj, namespace):
	"""\
	Get a value from an object with the given namespace.
	"""

	cur = obj
	for key in namespace:
	cur = cur[key]
	return cur

	def convert(name):

	SEPARATOR = u'\t'
	EXTENSION = 'tsv'

	path, ext = os.path.splitext(name)
	out_name = '{0}.{1}'.format(path, EXTENSION)

	def clean(text):
	return re.sub(SEPARATOR + '+', u'', u'{0}'.format(text))

	def write_out(outfile, vals):
	line = SEPARATOR.join(vals)
	outfile.write(u'{0}\n'.format(line).encode('utf-8'))

	with open(name, 'r+b') as infile:
	with open(out_name, 'w+b') as outfile:

	data = json.loads(infile.read())

	if len(data) > 0:
	headers = [x[1] for x in iter_obj(data[0])]
	write_out(outfile, [u'.'.join(header) for header in headers])

	for obj in data[1:]:
	values = [clean(val_from_namespace(obj, header)) for header in headers]
	write_out(outfile, values)

	if __name__ == '__main__':
	if len(sys.argv) <= 1:
	print('usage {0} [file 1] [file 2] ...'.format(sys.argv[0]))
	sys.exit(0)

	for name in sys.argv[1:]:
	convert(name)
	[
	{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf83"} , "data" : { "power" : 1 } , "uploader" : "alice" , "created" : { "$date" : "2012-10-16T20:46:08.154Z"}}
	,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf84"} , "data" : { "power" : 10} , "uploader" : "bob" , "created" : { "$date" : "2012-10-16T20:46:08.164Z"}}
	,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf85"} , "data" : { "power" : 100} , "uploader" : "carol" , "created" : { "$date" : "2012-10-16T20:46:08.174Z"}}
	,{ "_id" : { "$oid" : "507dc7905ef6dc0006e4cf86"} , "data" : { "power" : 1000} , "uploader" : "david" , "created" : { "$date" : "2012-10-16T20:46:08.184Z"}}
	]
uploader	created.$date	data.power	_id.$oid
bob	2012-10-16T20:46:08.164Z	10	507dc7905ef6dc0006e4cf84
carol	2012-10-16T20:46:08.174Z	100	507dc7905ef6dc0006e4cf85
david	2012-10-16T20:46:08.184Z	1000	507dc7905ef6dc0006e4cf86