Skip to content

Instantly share code, notes, and snippets.

@2minchul
Created September 5, 2021 12:27
Show Gist options
  • Save 2minchul/f676a12015e95fb3c4e2e17f30ec7862 to your computer and use it in GitHub Desktop.
Save 2minchul/f676a12015e95fb3c4e2e17f30ec7862 to your computer and use it in GitHub Desktop.
ndjson to csv. recommend pypy3
import csv
import json
import sys
import os.path
import tqdm
def main() -> int:
if len(sys.argv) == 2:
in_filename = sys.argv[1]
out_filename = os.path.splitext(in_filename)[0] + '.csv'
elif len(sys.argv) == 3:
in_filename = sys.argv[1]
out_filename = sys.argv[2]
else:
print('ndjson_to_csv.py input_file [output_file]')
return 1
print(in_filename, '->', out_filename)
with open(in_filename) as in_file, open(out_filename, 'w', newline='') as out_file, tqdm.tqdm() as bar:
csv_writer = csv.writer(out_file)
first_line = next(in_file)
first_json = json.loads(first_line)
header = list(first_json.keys())
csv_writer.writerow(header)
csv_writer.writerow([first_json[k] for k in header])
bar.update(1)
for line in in_file:
try:
d = json.loads(line)
except:
pass
csv_writer.writerow([d[k] for k in header])
bar.update(1)
return 0
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment