Skip to content

Instantly share code, notes, and snippets.

@nrrb
Forked from paulgb/convert.py
Last active May 16, 2017 18:47
Show Gist options
  • Save nrrb/ef4d359504c6ed8eb612e50a2d22a6e3 to your computer and use it in GitHub Desktop.
Save nrrb/ef4d359504c6ed8eb612e50a2d22a6e3 to your computer and use it in GitHub Desktop.
Convert the Yelp Academic dataset from JSON to CSV files with Pandas.
'''
Convert Yelp Academic Dataset from JSON to CSV
Requires Pandas (https://pypi.python.org/pypi/pandas)
By Paul Butler, No Rights Reserved
'''
import json
import pandas as pd
from glob import glob
def convert(x):
''' Convert a json string to a flat python dictionary
which can be passed into Pandas. '''
ob = json.loads(x)
for k, v in ob.items():
if isinstance(v, list):
ob[k] = ','.join(v)
elif isinstance(v, dict):
for kk, vv in v.items():
ob['%s_%s' % (k, kk)] = vv
del ob[k]
return ob
for json_filename in glob('*.json'):
csv_filename = '%s.csv' % json_filename[:-len('.json')]
print 'Converting %s to %s' % (json_filename, csv_filename)
df = pd.DataFrame([convert(line) for line in file(json_filename)])
df.to_csv(csv_filename, encoding='utf-8', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment