Created
January 21, 2019 00:54
-
-
Save a-y-khan/a71c4cddb0b9bc43a81d9f34f25f8c4c to your computer and use it in GitHub Desktop.
Naive parsing and output of JSON attributes to CSV.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import ijson | |
def test_ijson(filename): | |
record = {'records.item.author': None, | |
'records.item.author_flair_text': None, | |
'records.item.body': None} | |
parser = ijson.parse(open(filename, 'r')) | |
with open('output.csv', mode='w') as csv_file: | |
writer = csv.DictWriter(csv_file, fieldnames=list(record.keys())) | |
writer.writeheader() | |
for prefix, event, value in parser: | |
print(prefix, event, value) | |
if prefix.endswith('author') or \ | |
prefix.endswith('author_flair_text') or \ | |
prefix.endswith('body'): | |
record[prefix] = value | |
elif prefix == 'records.item' and event == 'end_map' and value is None: | |
writer.writerow(record) | |
for key in record.keys(): | |
record[key] = None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment