Skip to content

Instantly share code, notes, and snippets.

@duchenpaul
Created April 2, 2019 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save duchenpaul/c93fbe7593a4f1d94dc9965d6cbc038b to your computer and use it in GitHub Desktop.
Save duchenpaul/c93fbe7593a4f1d94dc9965d6cbc038b to your computer and use it in GitHub Desktop.
Split csv into X line, with or without header
import os
def split(filehandler, delimiter=',', row_limit=1000,
output_name_template='output_%s.csv', output_path='.', keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w', newline=''), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
# headers = reader.next()
headers = next(reader)
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w', newline=''), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
if __name__ == '__main__':
file_name = 'snakes_count_10000.csv'
basename = os.path.splitext(file_name)[0]
# Create folder if not exists
try:
os.mkdir(basename)
except Exception as e:
pass
with open(file_name) as filehandler:
split(filehandler, delimiter=',', row_limit=100,
output_name_template='{}_%s.csv'.format(basename), output_path=basename, keep_headers=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment