Skip to content

Instantly share code, notes, and snippets.

@chl
Created January 17, 2012 00:37
Show Gist options
  • Save chl/1623822 to your computer and use it in GitHub Desktop.
Save chl/1623822 to your computer and use it in GitHub Desktop.
Select fields (at depth) from one-record-per-line JSON streams
#!/usr/bin/env python
import sys
import argparse
import re
import simplejson
parser = argparse.ArgumentParser(description="Select fields (at depth) from one-record-per-line JSON streams")
parser.add_argument("-d", default=".", help="Path delimiter (default: .)")
parser.add_argument("-x", default="@", help="Array indexing prefix (default: @)")
parser.add_argument("-f", default="\t", help="Output field separator (default: tab)")
parser.add_argument("-r", default="\n", help="Output end-of-record signifier (default: newline)")
parser.add_argument("-n", default="", help="Output null value (default: empty string/null)")
parser.add_argument("-e", default="utf-8", help="Output encoding (default: utf-8)")
parser.add_argument("-j", default=False, action="store_true", help="JSON-encode output fields (default: False)")
parser.add_argument("path", nargs="+", help="Path expressions (e.g. foo.bar a.b.@0.c)")
args = parser.parse_args()
paths = [[int(x[len(args.x):]) if re.match("^%s[0-9]+$" % args.x, x) else x for x in path.split(args.d)] for path in args.path]
encode = simplejson.dumps if args.j else lambda x: unicode(x).encode(args.e)
null = (None if not args.n else simplejson.loads(args.n)) if args.j else args.n
try:
for line in sys.stdin:
buffer = []
try:
data = simplejson.loads(line)
for path in paths:
value = data
for x in path:
try:
value = value[x]
except:
value = None
break
buffer.append(encode(value or null))
except simplejson.decoder.JSONDecodeError:
pass
sys.stdout.write(args.f.join(buffer) + args.r)
except IOError:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment