Created
January 17, 2012 00:37
-
-
Save chl/1623822 to your computer and use it in GitHub Desktop.
Select fields (at depth) from one-record-per-line JSON streams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import argparse | |
import re | |
import simplejson | |
parser = argparse.ArgumentParser(description="Select fields (at depth) from one-record-per-line JSON streams") | |
parser.add_argument("-d", default=".", help="Path delimiter (default: .)") | |
parser.add_argument("-x", default="@", help="Array indexing prefix (default: @)") | |
parser.add_argument("-f", default="\t", help="Output field separator (default: tab)") | |
parser.add_argument("-r", default="\n", help="Output end-of-record signifier (default: newline)") | |
parser.add_argument("-n", default="", help="Output null value (default: empty string/null)") | |
parser.add_argument("-e", default="utf-8", help="Output encoding (default: utf-8)") | |
parser.add_argument("-j", default=False, action="store_true", help="JSON-encode output fields (default: False)") | |
parser.add_argument("path", nargs="+", help="Path expressions (e.g. foo.bar a.b.@0.c)") | |
args = parser.parse_args() | |
paths = [[int(x[len(args.x):]) if re.match("^%s[0-9]+$" % args.x, x) else x for x in path.split(args.d)] for path in args.path] | |
encode = simplejson.dumps if args.j else lambda x: unicode(x).encode(args.e) | |
null = (None if not args.n else simplejson.loads(args.n)) if args.j else args.n | |
try: | |
for line in sys.stdin: | |
buffer = [] | |
try: | |
data = simplejson.loads(line) | |
for path in paths: | |
value = data | |
for x in path: | |
try: | |
value = value[x] | |
except: | |
value = None | |
break | |
buffer.append(encode(value or null)) | |
except simplejson.decoder.JSONDecodeError: | |
pass | |
sys.stdout.write(args.f.join(buffer) + args.r) | |
except IOError: | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment