Skip to content

Instantly share code, notes, and snippets.

@jacquarg
Last active December 24, 2015 06:59
Show Gist options
  • Select an option

  • Save jacquarg/6760429 to your computer and use it in GitHub Desktop.

Select an option

Save jacquarg/6760429 to your computer and use it in GitHub Desktop.
csv with unicode support for python 2.7 Reader from http://stackoverflow.com/a/6187936/1414450 Writer http://stackoverflow.com/a/5838817/1414450 with a small add-on.
import csv
import cStringIO
import codecs
class UnicodeCsvReader(object):
"From http://stackoverflow.com/a/6187936/1414450."
def __init__(self, f, encoding="utf-8", **kwargs):
self.csv_reader = csv.reader(f, **kwargs)
self.encoding = encoding
def __iter__(self):
return self
def next(self):
# read and split the csv row into fields
row = self.csv_reader.next()
# now decode
return [unicode(cell, self.encoding) for cell in row]
@property
def line_num(self):
return self.csv_reader.line_num
class UnicodeDictReader(csv.DictReader):
def __init__(self, f, encoding="utf-8", fieldnames=None, **kwds):
csv.DictReader.__init__(self, f, fieldnames=fieldnames, **kwds)
self.reader = UnicodeCsvReader(f, encoding=encoding, **kwds)
class DictUnicodeWriter(object):
"From : http://stackoverflow.com/a/5838817/1414450 . Enhanced."
def __init__(self, f, fieldnames, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.DictWriter(self.queue, fieldnames, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, D):
self.writer.writerow({k:v.encode("utf-8") for k,v in D.items()})
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for D in rows:
self.writerow(D)
def writeheader(self):
self.writer.writeheader()
# Added GJ
def truncateNwriterow(self, D) :
self.writerow({ k: v for k, v in D.items() if k in self.writer.fieldnames})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment