Skip to content

Instantly share code, notes, and snippets.

@lelandbatey
Created November 8, 2021 23:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lelandbatey/a862b6f8bb0b707024c51083e56c8b08 to your computer and use it in GitHub Desktop.
Save lelandbatey/a862b6f8bb0b707024c51083e56c8b08 to your computer and use it in GitHub Desktop.
Parse Athena-formatted data into JSON
#!/usr/bin/env python3
# A script which will parse athena-formatted lines to be json. Assumes all
# fields and values are strings.
import json
import sys
import re
def parseathena(line):
new = line
if not new.strip(): return "{}"
new = re.sub(r'(\w), ', r'\1", ', new)
new = re.sub(r', (\w)', r', "\1', new)
new = re.sub(r'{(\w)', r'{"\1', new)
new = re.sub(r'(\w)}', r'\1"}', new)
new = re.sub(r'}, (\w)', r'}, "\1', new)
new = re.sub(r'\[(\w)', '["\\1', new)
new = re.sub(r'(\w)]', r'\1"]', new)
new = new.replace('=[', '":[')
new = new.replace('={', '":{')
new = new.replace('=', '":"')
return new
def main():
for line in sys.stdin:
line = line.strip()
if not line:
continue
print(json.dumps(json.loads(parseathena(line))))
if __name__ == '__main__': main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment