BHEADRICK/splitcsv.py

## splitcsv.py
import pandas as pd

#csv file name to be read in
in_csv = 'filename.csv'

#get the number of lines of the csv file to be read
number_lines = sum(1 for row in (open(in_csv)))

#size of rows of data to write to the csv,
#you can change the row size according to your need
rowsize = 20000

#start looping through data writing it to a new file for each set
out_file_prefix = in_csv.split()[0]


hf = pd.read_csv(in_csv, nrows=1)

header = list(hf.columns.values)

part = 0
for i in range(1,number_lines,rowsize):
    part +=1
    df = pd.read_csv(in_csv,
          header=None,
          nrows = rowsize,
          skiprows = i)

    #csv to write data to a new file with indexed name. input_1.csv etc.
    out_csv = out_file_prefix + '-part-' + str(part) + '.csv'

    df.to_csv(out_csv,
          index=False,
          header=header,
          mode='a',#append data to csv file
          chunksize=rowsize)#size of data to append for each loop
	import pandas as pd

	#csv file name to be read in
	in_csv = 'filename.csv'

	#get the number of lines of the csv file to be read
	number_lines = sum(1 for row in (open(in_csv)))

	#size of rows of data to write to the csv,
	#you can change the row size according to your need
	rowsize = 20000

	#start looping through data writing it to a new file for each set
	out_file_prefix = in_csv.split()[0]


	hf = pd.read_csv(in_csv, nrows=1)

	header = list(hf.columns.values)

	part = 0
	for i in range(1,number_lines,rowsize):
	part +=1
	df = pd.read_csv(in_csv,
	header=None,
	nrows = rowsize,
	skiprows = i)

	#csv to write data to a new file with indexed name. input_1.csv etc.
	out_csv = out_file_prefix + '-part-' + str(part) + '.csv'

	df.to_csv(out_csv,
	index=False,
	header=header,
	mode='a',#append data to csv file
	chunksize=rowsize)#size of data to append for each loop