Skip to content

Instantly share code, notes, and snippets.

@wblondel
Created December 1, 2023 13:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wblondel/06025367a3333ae13c8f1c83576bcf91 to your computer and use it in GitHub Desktop.
Save wblondel/06025367a3333ae13c8f1c83576bcf91 to your computer and use it in GitHub Desktop.
Split a large CSV file into smaller files, keeping the header in each file
import csv
import argparse
def split_file(input_file_path, entries_per_file, output_prefix):
with open(input_file_path, 'r') as input_file:
reader = csv.reader(input_file, delimiter=';', quotechar='"')
headers = next(reader, None)
file_count = 1
entry_count = 0
current_file = open(f"{output_prefix}_{file_count}.csv", 'w', encoding='utf-8')
writer = csv.writer(current_file, delimiter=';', quotechar='"')
writer.writerow(headers)
for entry in reader:
if entry_count >= entries_per_file:
current_file.close()
file_count += 1
entry_count = 0
current_file = open(f"{output_prefix}_{file_count}.csv", 'w', encoding='utf-8')
writer = csv.writer(current_file, delimiter=';', quotechar='"')
writer.writerow(headers)
writer.writerow(entry)
entry_count += 1
current_file.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Split a large CSV file into smaller files, keeping the header in "
"each file.")
parser.add_argument('input_file', help='Input file to split')
parser.add_argument('entries_per_file', type=int, help='Number of entries per output file')
parser.add_argument('output_prefix', help='Output file prefix')
args = parser.parse_args()
split_file(args.input_file, args.entries_per_file, args.output_prefix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment