Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ludovikcoba/66af4ded474ea8673bafbede368c9e79 to your computer and use it in GitHub Desktop.
Save ludovikcoba/66af4ded474ea8673bafbede368c9e79 to your computer and use it in GitHub Desktop.
Yelp dataset conversion for python 3.7
# Fix of https://gist.github.com/paulgb/5265767
import json
import pandas as pd
from glob import glob
import sys
def convert(x):
ob = json.loads(x)
new_ob = {}
for k, v in ob.items():
if isinstance(v, list):
new_ob[k] = ','.join(v)
elif isinstance(v, dict):
for kk, vv in v.items():
new_ob['%s_%s' % (k, kk)] = vv
else:
new_ob[k] = v
return new_ob
def main(filename):
json_file = open(filename, "r", encoding="utf8")
df = pd.DataFrame([convert(line) for line in json_file])
csv_filename = '%s.csv' % filename[:-5]
df.to_csv(csv_filename)
if __name__ == '__main__':
if len(sys.argv) == 0:
sys.exit("Target file is missing.")
for i in range(1, len(sys.argv) + 1):
main(sys.argv[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment