Skip to content

Instantly share code, notes, and snippets.

@eruffaldi
Created December 14, 2017 11:21
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eruffaldi/8615f97cd81cb782fe142c822b45c0e2 to your computer and use it in GitHub Desktop.
Save eruffaldi/8615f97cd81cb782fe142c822b45c0e2 to your computer and use it in GitHub Desktop.
Typed CSV Reader and Writer
from csv import DictReader,DictWriter
# originally from pygramel
class TypedDictReader(DictReader):
"""A class for iterating a CSV file and type cast the values."""
def __init__(self, csvfile, casts, fieldnames=None, restkey=None,
restval=None, dialect='excel', *args, **kwds):
"""Arguments:
- f: An iterable object such as as file. Passed on to
csv.DictReader
- casts: A dict mapping from attribute names to functions to apply
to these names, e.g., {'id':int, 'salary':float}
- fieldnames: Passed on to csv.DictReader
- restkey: Passed on to csv.DictReader
- restval: Passed on to csv.DictReader
- dialect: Passed on to csv.DictReader
- *args: Passed on to csv.DictReader
- **kwds: Passed on to csv.DictReader
"""
DictReader.__init__(self, csvfile, fieldnames=fieldnames,
restkey=restkey, restval=restval, dialect=dialect,
*args, **kwds)
if not type(casts) == dict:
raise TypeError("The casts argument must be a dict")
for v in casts.values():
if not callable(v):
raise TypeError("The values in casts must be callable")
self._casts = casts
def __next__(self): # For Python 3
row = DictReader.__next__(self)
for (att, func) in self._casts.items():
row[att] = func(row[att])
return row
def next(self): # For Python 2
row = DictReader.next(self)
for (att, func) in self._casts.items():
row[att] = func(row[att])
return row
# originally from pygramel
class TypedDictWriter(DictWriter):
"""A class for iterating a CSV file and type cast the values."""
def __init__(self, csvfile, casts, fieldnames, restval="",extrasaction='raise',dialect='excel', *args, **kwds):
DictWriter.__init__(self, csvfile, fieldnames=fieldnames,
restval=restval, extrasaction=extrasaction,dialect=dialect,
*args, **kwds)
if not type(casts) == dict:
raise TypeError("The casts argument must be a dict")
for v in casts.values():
if not callable(v):
raise TypeError("The values in casts must be callable")
self._casts = casts
self._inheader = False
def writeheader(self):
self._inheader = True
DictWriter.writeheader(self)
self._inheader = False
def writerow(self,row):
if not self._inheader:
for (att, func) in self._casts.items():
row[att] = func(row[att])
return DictWriter.writerow(self, row)
def main():
import json
a = open("out.csv","wb")
dw = TypedDictWriter(a,dict(complex=lambda x: json.dumps(x)),["number","text","complex"])
dw.writeheader()
dw.writerow(dict(complex=[1,2,3],number=0.5,text="hello"))
dw.writerow(dict(complex=[4,5,dict(p=3)],number=10.5,text="hello2"))
a.close()
a = open("out.csv","rb")
dr = TypedDictReader(a,dict(complex=lambda x: json.loads(x), number=float))
print "reading"
for x in dr:
print x
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment