Skip to content

Instantly share code, notes, and snippets.

@medigeek
Created July 7, 2021 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save medigeek/326e9485ea67fd7e788ba5298ceb1254 to your computer and use it in GitHub Desktop.
Save medigeek/326e9485ea67fd7e788ba5298ceb1254 to your computer and use it in GitHub Desktop.
Python pandas - split csv file every n rows
#!/usr/bin/python
import pandas as pd
import numpy as np
import csv
#get every 10000 rows
#group by 0,1,2,3,4,...
#df2[np.arange(len(df2))//10000==0].to_csv('out0.csv', index=False)
#df2[np.arange(len(df2))//10000==1].to_csv('out1.csv', index=False)
#df2[np.arange(len(df2))//10000==2].to_csv('out2.csv', index=False)
#df2[np.arange(len(df2))//10000==3].to_csv('out3.csv', index=False)
#df2[np.arange(len(df2))//10000==4].to_csv('out4.csv', index=False)
#df2[np.arange(len(df2))//10000==5].to_csv('out5.csv', index=False)
df2 = pd.read_csv('in.csv')
#replace NaN with empty "" string
df2.prefix = 'N/A'
i = 0
everyNrows = 5000
while len(df2[np.arange(len(df2))//everyNrows == i]) > 0:
df2[np.arange(len(df2))//everyNrows == i].to_csv('out' + str(i) + '.csv', index=False, quoting=csv.QUOTE_NONNUMERIC, header=True)
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment