Skip to content

Instantly share code, notes, and snippets.

@tgecho
Last active January 9, 2020 02:59
Show Gist options
  • Save tgecho/ddfab4dca5b0aff8da7afe97fcfa69b6 to your computer and use it in GitHub Desktop.
Save tgecho/ddfab4dca5b0aff8da7afe97fcfa69b6 to your computer and use it in GitHub Desktop.
A simple jq imitator that uses python expressions for filting
#!/usr/bin/env python3
import json
import sys
import argparse
import re
import pprint
parser = argparse.ArgumentParser()
parser.add_argument(
'code', help="""A valid python expression. It will be passed `d` and `data` variables
containing the deserialized contents of the input data. In addition, the
deserialized objects are wrapped in a simple dict subclass that allows
using attributes as well as standard keyed access (e.g. data.foo == data['foo']).""")
parser.add_argument('file', nargs="?",
type=argparse.FileType('r'),
default=sys.stdin,
help='Input data as a file path (will also accept stdin). Must be valid json.')
parser.add_argument('--out', '-o',
type=argparse.FileType('w'),
default=sys.stdout)
parser.add_argument('--import', '-i',
action='append',
help='Add any importable modules to the scope of your expression. Can be repeated as needed.')
parser.add_argument('--sort-keys',
default=False,
action='store_true',
help='Sort object keys when printing the output.')
parser.add_argument('--indent',
type=int,
help='Pretty print the output and indent by this many spaces.')
parser.add_argument('--raw-out', '-r',
default=False,
action='store_true',
help='Use pprint (if indented) or str() instead of json. Most useful if you want to get a bare string.')
def bail(msg):
print(msg, file=sys.stderr)
sys.exit(1)
def main():
args = parser.parse_args()
try:
original = json.load(args.file, object_hook=AttrDict)
except json.decoder.JSONDecodeError as json_err:
print(f'Failed to parse JSON. {json_err}', file=sys.stderr)
sys.exit(1)
except IOError as io_err:
bail(f'Input: {io_err}')
try:
import_names = getattr(args, 'import')
imports = {name: __import__(name)
for name in import_names} if import_names else {}
context = {'data': original, 'd': original}
except ModuleNotFoundError as import_err:
bail(import_err)
try:
transformed = eval(args.code, imports, context)
except NameError as name_err:
msg = f'Transform exception: {name_err}'
if not re.search(r'\b(d|data)\b', args.code):
msg += '\nDid you mean to reference `d` or `data`?'
bail(msg)
except Exception as eval_err:
bail(f'Transform exception: {eval_err}')
seperators = None
compact = True
if not args.indent:
seperators = (',', ":")
compact = False
if args.raw_out:
if args.indent:
kwargs = {
'stream': args.out,
'compact': compact,
}
if sys.version_info >= (3, 8):
kwargs['sort_dicts'] = args.sort_keys
pprint.pprint(transformed, **kwargs)
else:
args.out.write(str(transformed))
else:
indent = args.indent or None
try:
json.dump(transformed, args.out,
indent=indent, separators=seperators, sort_keys=args.sort_keys, )
except IOError as io_err:
bail(f'Output: {io_err}')
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
self[key] = value
if __name__ == "__main__":
main()
$ echo '[{"foo": "bar"},{"foo":"frum","alpha":"one"}]' | dt '[i.foo for i in d]'
["bar","frum"]
$ dt '[i['foo'] for i in d]' foo.json
["bar","frum"]
$ echo '{"why": "{\"did\": \"we embed json?\"}"}' | dt 'json.loads(d.why)' --import json
{"did":"we embed json?"}
$ echo '{"a": "string"}' | dt 'd.a'
"string"
$ echo '{"a": "string"}' | dt 'd.a' --raw-out
string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment