Created
January 20, 2016 13:50
-
-
Save mrosata/e048e0328ed34f293c22 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Merge together .csv files. I'm creating this because I have 30 .csv files which need to be concatenated | |
together. This uses the headers from 1 file and then will concate w/o headers 1 through n such as : | |
"customer-data.csv", "customer-data (1).csv" ... "customer-data (n).csv" | |
""" | |
# Todo: make use of the csv module and check headers from each file to make sure data lines up. | |
# Todo: allow explicit filenames to be passed in as well as lists of files | |
import csv | |
class CSV_Monster: | |
current_filename = "" | |
file_h = None | |
i = None | |
with_headers = False | |
def __init__(self, base_name, last_i, first_i=None, save_name="final-csv-output.csv"): | |
""" | |
Prepare to write the files. Open the save name and wait for run() to be called on the | |
utility | |
:base_name:str: The common file path and name of csv files to parse | |
:last_i:int: The highest integer found in file names. such as base/name-csv-file (30).csv | |
:first_i:int|None: My files start without a number so None is default. | |
:save_name:str: The base name of the final output .csv | |
""" | |
self.base_name = base_name | |
self.save_name = save_name | |
self.last_i = last_i | |
self.i = first_i | |
self.setup() | |
# --- Open up the output file for writing | |
print "Preparing to write to file: %s" % (self.save_name,) | |
self.output_file = open(self.save_name, 'w+') | |
def open(self, filename=None): | |
"""Open the self.current_filename for reading As of now this method is overkill, | |
there is no way to explicitly tell the class to append a file which isn't named | |
using the self.base_name convention. But I would like to extend the object to | |
be able to allow the user to pass in specific names. | |
:filename:str: Pass filename to explicitly open. Default - self.current_filename | |
:{return}:file_handler: | |
""" | |
if filename is not None: | |
self.current_filename = filename | |
self.file_h = open(self.current_filename, 'r+') | |
print "opening up file: %s" % (self.current_filename,) | |
return self.file_h | |
def next(self): | |
"""Setup the next file to be read then return file handler by calling self.open() | |
#Todo: next() should be able to consume a list as well. | |
:{return}:file_handler: | |
""" | |
if self.i > self.last_i: | |
return False | |
self.current_filename = self.base_name | |
if self.i is None: | |
self.i = 0 | |
self.with_headers = True | |
else: | |
self.current_filename += " ("+str(self.i)+")" | |
self.with_headers = False | |
# complete the filenamme for the current file | |
self.current_filename += ".csv" | |
# incriment for the next file | |
self.i = self.i + 1 | |
return self.open() | |
def run(self): | |
"""Roll through each file and append lines to the output.""" | |
while self.next(): | |
# should skip the headers unless self.with_headers is True | |
if not self.with_headers: | |
self.file_h.next() | |
# Append each line in current file to the output file | |
for line in self.file_h: | |
self.output_file.write(line) | |
self.file_h.close() | |
print "Closing the main file... completing operations." | |
self.output_file.close() | |
def check_headers(self): | |
# TODO: Add header checks | |
pass | |
# This will concate w/o headers customer-data.csv, customer-data (1).csv ... customer-data (30).csv | |
csv_monster = CSV_Monster("files/customer-data", 30) | |
csv_monster.run() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment