Skip to content

Instantly share code, notes, and snippets.

@Faxn
Created December 9, 2013 18:04
Show Gist options
  • Save Faxn/7876963 to your computer and use it in GitHub Desktop.
Save Faxn/7876963 to your computer and use it in GitHub Desktop.
Small script to merge and split csv files.
import csv, argparse, _thread, os, time
#TODO: break these out into command line opts
in_file_open_args = dict(newline='', encoding='CP1252', errors='strict')
out_file_open_args = dict(newline='', encoding='UTF-8', errors='backslashreplace')
#opens files from the list of paths provided as csv files and writes all
#of their rows into the output FILE provides as out_file.
def merge(infile_names, out_file):
outwriter = csv.writer(out_file, dialect='excel')
for infile in infile_names:
with open(infile, **in_file_open_args) as infile:
inreader = csv.reader(infile, dialect='excel')
for row in inreader:
outwriter.writerow(row)
#reads rows from the provided file and writes them out to csv files with
# at most out_file_rows rows each.
def split(in_file, out_file_name_base, out_file_rows):
file_number = 1
csv_reader = csv.reader(in_file)
try:
while 1:
with open(out_file_name_base + str(file_number)+ '.csv', 'w', **out_file_open_args) as out_file:
csv_writer = csv.writer(out_file)
file_number+=1
for i in range(0, out_file_rows):
csv_writer.writerow(next(csv_reader))
except StopIteration:
pass
parser = argparse.ArgumentParser()
parser.add_argument('infile', nargs='+', help="input csv file or files")
parser.add_argument('outfile', help="output csv file or name base if using split.")
parser.add_argument('-s', '--split', help="Split output into files with this many rows.", type=int)
args = parser.parse_args()
print(args)
if args.split:
r, w = os.pipe()
_thread.start_new_thread(merge, (args.infile, os.fdopen(w, 'w')))
_thread.start_new_thread(split, (os.fdopen(r), args.outfile, args.split))
time.sleep(1)
else:
merge(args.infile, open(args.outfile, 'a', **out_file_open_args))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment