Skip to content

Instantly share code, notes, and snippets.

@slhck
Created June 22, 2022 08:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slhck/3122d45b40494de05feef77b6e3e0808 to your computer and use it in GitHub Desktop.
Save slhck/3122d45b40494de05feef77b6e3e0808 to your computer and use it in GitHub Desktop.
Convert CSV to NDJSON (LDJSON)
#!/usr/bin/env python3
#
# csv_to_ndjson.py
#
# Author: Werner Robitza
#
# Convert CSV files to .ndjson.
# This assumes the same keys being used in every line of the input file.
# It works on a line-by-line basis, so it should be fast and memory-efficient.
import json
import argparse
import csv
from tqdm import tqdm
def convert_to_ndjson(csv_file, ndjson_file):
with open(csv_file, "r") as in_f:
with open(ndjson_file, "w") as out_f:
csv_reader = csv.DictReader(in_f)
for row in tqdm(csv_reader):
out_f.write(json.dumps(row) + "\n")
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("input", help="input .csv")
parser.add_argument("output", help="output .ndjson")
cli_args = parser.parse_args()
convert_to_ndjson(cli_args.input, cli_args.output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment