Skip to content

Instantly share code, notes, and snippets.

@mrosata
Created January 20, 2016 13:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrosata/e048e0328ed34f293c22 to your computer and use it in GitHub Desktop.
Save mrosata/e048e0328ed34f293c22 to your computer and use it in GitHub Desktop.
"""
Merge together .csv files. I'm creating this because I have 30 .csv files which need to be concatenated
together. This uses the headers from 1 file and then will concate w/o headers 1 through n such as :
"customer-data.csv", "customer-data (1).csv" ... "customer-data (n).csv"
"""
# Todo: make use of the csv module and check headers from each file to make sure data lines up.
# Todo: allow explicit filenames to be passed in as well as lists of files
import csv
class CSV_Monster:
current_filename = ""
file_h = None
i = None
with_headers = False
def __init__(self, base_name, last_i, first_i=None, save_name="final-csv-output.csv"):
"""
Prepare to write the files. Open the save name and wait for run() to be called on the
utility
:base_name:str: The common file path and name of csv files to parse
:last_i:int: The highest integer found in file names. such as base/name-csv-file (30).csv
:first_i:int|None: My files start without a number so None is default.
:save_name:str: The base name of the final output .csv
"""
self.base_name = base_name
self.save_name = save_name
self.last_i = last_i
self.i = first_i
self.setup()
# --- Open up the output file for writing
print "Preparing to write to file: %s" % (self.save_name,)
self.output_file = open(self.save_name, 'w+')
def open(self, filename=None):
"""Open the self.current_filename for reading As of now this method is overkill,
there is no way to explicitly tell the class to append a file which isn't named
using the self.base_name convention. But I would like to extend the object to
be able to allow the user to pass in specific names.
:filename:str: Pass filename to explicitly open. Default - self.current_filename
:{return}:file_handler:
"""
if filename is not None:
self.current_filename = filename
self.file_h = open(self.current_filename, 'r+')
print "opening up file: %s" % (self.current_filename,)
return self.file_h
def next(self):
"""Setup the next file to be read then return file handler by calling self.open()
#Todo: next() should be able to consume a list as well.
:{return}:file_handler:
"""
if self.i > self.last_i:
return False
self.current_filename = self.base_name
if self.i is None:
self.i = 0
self.with_headers = True
else:
self.current_filename += " ("+str(self.i)+")"
self.with_headers = False
# complete the filenamme for the current file
self.current_filename += ".csv"
# incriment for the next file
self.i = self.i + 1
return self.open()
def run(self):
"""Roll through each file and append lines to the output."""
while self.next():
# should skip the headers unless self.with_headers is True
if not self.with_headers:
self.file_h.next()
# Append each line in current file to the output file
for line in self.file_h:
self.output_file.write(line)
self.file_h.close()
print "Closing the main file... completing operations."
self.output_file.close()
def check_headers(self):
# TODO: Add header checks
pass
# This will concate w/o headers customer-data.csv, customer-data (1).csv ... customer-data (30).csv
csv_monster = CSV_Monster("files/customer-data", 30)
csv_monster.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment