Skip to content

Instantly share code, notes, and snippets.

@davidlukac
Last active October 27, 2017 15:49
Show Gist options
  • Save davidlukac/b1e76141c88727b155b5032d5f09f5df to your computer and use it in GitHub Desktop.
Save davidlukac/b1e76141c88727b155b5032d5f09f5df to your computer and use it in GitHub Desktop.
Joining CSVs rough way
from modules.modules import CsvRepository, FileResource
def read_and_write(source_file: str, f, rm_header: bool):
line_counter = 0
with open(source_file) as f_in:
for line in f_in:
if rm_header:
if line_counter != 0:
f.write(line)
line_counter += 1
else:
f.write(line)
if __name__ == '__main__':
repo = CsvRepository(FileResource.get_resource_filename('data_Q2_2017/2017-*.csv'))
print(repo.matching_files)
print(len(repo.matching_files))
with open(FileResource.get_resource_filename('data_Q2_2017/out.csv'), 'w') as f:
for i, in_file in enumerate(repo.matching_files):
if i == 0:
remove_header = False
else:
remove_header = True
read_and_write(in_file, f, remove_header)
import glob
class CsvRepository(object):
def __init__(self, path_pattern: str, recursive: bool = True):
self._path_pattern = path_pattern
self._recursive = recursive
self._matching_files = None
@property
def matching_files(self) -> List[str]:
if self._matching_files is None:
self._matching_files = glob.glob(self._path_pattern, recursive=self._recursive)
return self._matching_files
class FileResource(object):
@staticmethod
def get_resource_filename(filename: str) -> str:
return resource_filename(Resources.__module__, filename)
@staticmethod
def get_test_resource_filename(filename: str) -> str:
return resource_filename(TestResources.__module__, filename)
class Resources(object):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment