Last active
September 9, 2017 17:35
-
-
Save devvyn/759f08e3d83e4cf8f7ef to your computer and use it in GitHub Desktop.
Super easy CSV file reader class with column accessors (magic __getattr__ for named columns)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
""" | |
Convenience wrapper for csv.DictReader showing use of __getattr__ to do magic lookups on the loaded file. | |
To procedurally implement CsvFileModel, without a new class, pass the target package_name and/or filename to the constructor. | |
For example: | |
my_spending_report = CsvFileModel(filename='spendnov1995.tsv', package_name='my_project.data') | |
my_spending_report = CsvFileModel(filename='spendnov1995.tsv') | |
To sub-class the reader for a particular file, create an empty class that inherits from CsvFileModel, and your new | |
class will automatically look for a file with a name based on your class name. | |
For example: | |
class MySpendingReport(CsvFileModel): # reads from my_spending_report.csv | |
pass | |
would assume it's supposed to read a file in the base package's folder called "my_spending_report.csv". (Whichever | |
package serves as your entry point for running your application is typically treated as the base package). | |
You may wish to override the constructor on your class in order to pass a different path, package, or filename. | |
For example, if your data file is always in a sub-folder named "data", you may want to use or inherit form a sub-class | |
that might look like this: | |
class MyData(CsvFileModel): | |
def __init__(self, filename=None, package_name=None): | |
super(MySpendingReport, self).__init__(filename, package, 'data') | |
and then call it or sub-class it like so: | |
my_report_data = MyData(filename='spenddec1994.csv') # reads from data/spenddec1994.csv | |
class MyArchivalSpendingReport(MyData): | |
def __init__(self, filename, package_name='my_project.archives'): | |
super(MyArchivalSpendingReport, self).__init__(filename, package_name) | |
my_report_data = MyArchivalSpendingReport('spendnov1982.csv') # reads from my_project.archives:data/spendnov1982.csv | |
""" | |
import os.path | |
__author__ = 'Devvyn Murphy' | |
import csv | |
from inflection import underscore | |
import pkg_resources | |
class CsvFileModel(object): | |
""" | |
A wrapper for csv.DictReader that automatically loads and parses the given file and provides getters that make retrieval easy. | |
Example: | |
model = CsvFileModel(filename='examples.csv', pkg='my_project.data') | |
print(model['colour']) # list of entire "colour" column | |
print(model.color) # same as above | |
print(model[4]['colour']) # column named "color" in fourth row | |
print(model.colour[4] # same as above | |
print(model['colour'][4] # same | |
:param filename: | |
:type filename: | |
""" | |
def __init__(self, filename=None, package_name=None, path=None): | |
super(CsvFileModel, self).__init__() | |
if filename is None: | |
if self.__class__.__name__ == 'CsvFileModel': | |
raise ValueError( | |
'Missing filename argument in constructor. You may want to inherit from this class so your new class name determines the filename.') | |
else: | |
filename = '{}.csv'.format((underscore(self.__class__.__name__))) | |
if path is None: | |
path = '' | |
if package_name is None: | |
package_name = self.__module__ | |
file_path = os.path.join(path, filename) | |
resource_filename = pkg_resources.resource_filename(package_name, file_path) | |
with open(resource_filename, 'rb') as csv_file: | |
reader = csv.DictReader(csv_file) | |
self.data = list(reader) | |
self.columns = reader.fieldnames | |
self.rows = list((row.values() for row in self.data)) | |
def __getattr__(self, name): | |
""" | |
:param name: | |
:return: :rtype: | |
""" | |
columns_replace_iter = list(str.lower(str(x).replace(' ', '_')) for x in self.columns) | |
if (name in self.columns) or (str.lower(name) in columns_replace_iter): | |
return list((x[name.replace('_', ' ')] for x in self.data)) | |
raise AttributeError(name) | |
def __getitem__(self, index): | |
if isinstance(index, int): | |
return self.data[index] | |
return self.__getattr__(index) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If I were to do this again, I'd change just about everything. I'm not crazy about coupling file names of data sets to class names in code, but it's an interesting bit of sugar. The code could be more readable and it would be prudent to use generators and probable async I/O.