Skip to content

Instantly share code, notes, and snippets.

@chafreaky
Last active May 2, 2017 11:31
Show Gist options
  • Save chafreaky/8810903 to your computer and use it in GitHub Desktop.
Save chafreaky/8810903 to your computer and use it in GitHub Desktop.
Scans for all zipped files within a directory (recursively), opens archives and reads the csv files inside the archives. Append all csv files into a single one named 'output.csv'. Useful for creating a single file from thousands of different csv files. Skips headers.
import os
import fnmatch
import zipfile
import StringIO
import csv
print('Scanning for files ...')
count = 0
for r, d, f in os.walk(os.path.dirname(os.path.abspath(__file__))):
for fn in fnmatch.filter(f, '*.zip'):
count = count + 1
print('Found '+str(count) +' zipped files')
buff = raw_input('Press <Enter> to process all zipped files')
print('Generating single file (might take a while) ...')
progress = 0
with open('output.csv', 'wb') as o:
for root, dirnames, filenames in os.walk(os.path.dirname(os.path.abspath(__file__))):
for filename in fnmatch.filter(filenames, '*.zip'):
with open(os.path.join(root, filename), 'rb') as filehandle:
with zipfile.ZipFile(filehandle) as zfile:
dataFile = filename.replace(filename[-3:], 'csv')
data = StringIO.StringIO(zfile.read(dataFile))
reader = csv.reader(data)
next(reader, None)
progress = progress + 1
a = csv.writer(o, delimiter=',', lineterminator='\n')
for row in reader:
a.writerow(row)
print('\n') * 100
print('Writing file number '+ str(progress) +'/'+str(count)+' ...')
print('Completed. Enjoy.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment