Last active
May 2, 2017 11:31
-
-
Save chafreaky/8810903 to your computer and use it in GitHub Desktop.
Scans for all zipped files within a directory (recursively), opens archives and reads the csv files inside the archives. Append all csv files into a single one named 'output.csv'. Useful for creating a single file from thousands of different csv files. Skips headers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import fnmatch | |
import zipfile | |
import StringIO | |
import csv | |
print('Scanning for files ...') | |
count = 0 | |
for r, d, f in os.walk(os.path.dirname(os.path.abspath(__file__))): | |
for fn in fnmatch.filter(f, '*.zip'): | |
count = count + 1 | |
print('Found '+str(count) +' zipped files') | |
buff = raw_input('Press <Enter> to process all zipped files') | |
print('Generating single file (might take a while) ...') | |
progress = 0 | |
with open('output.csv', 'wb') as o: | |
for root, dirnames, filenames in os.walk(os.path.dirname(os.path.abspath(__file__))): | |
for filename in fnmatch.filter(filenames, '*.zip'): | |
with open(os.path.join(root, filename), 'rb') as filehandle: | |
with zipfile.ZipFile(filehandle) as zfile: | |
dataFile = filename.replace(filename[-3:], 'csv') | |
data = StringIO.StringIO(zfile.read(dataFile)) | |
reader = csv.reader(data) | |
next(reader, None) | |
progress = progress + 1 | |
a = csv.writer(o, delimiter=',', lineterminator='\n') | |
for row in reader: | |
a.writerow(row) | |
print('\n') * 100 | |
print('Writing file number '+ str(progress) +'/'+str(count)+' ...') | |
print('Completed. Enjoy.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment