Skip to content

Instantly share code, notes, and snippets.

@geeksunny
Created November 26, 2012 08:18
Show Gist options
  • Save geeksunny/4147171 to your computer and use it in GitHub Desktop.
Save geeksunny/4147171 to your computer and use it in GitHub Desktop.
csv file splitter
import csv
# Configuration
header_line = 1
limit = 65000
target_file = 'file-to-split.csv'
# Initial output file
iterator = 0
new_file_prefix = target_file.replace('.csv','')+'_split_'
file_num = 1
# Reading file
with open(target_file, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
iterator+=1
if iterator == header_line:
header = row # Storing the header row for future files
out_file = open(new_file_prefix+str(file_num)+'.csv','w')
writer = csv.writer(out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
writer.writerow(header) # First file's header row being written
continue
if (iterator % limit) == 0: # When we hit the row-limit, close the file and open a new one
out_file.close()
file_num+=1
out_file = open(new_file_prefix+str(file_num)+'.csv','w')
writer = csv.writer(out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
writer.writerow(header)
print "New file started at line: "+str(iterator)
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment