Skip to content

Instantly share code, notes, and snippets.

@gsf
Forked from anarchivist/gist:371235
Created May 13, 2010 20:24
Show Gist options
  • Save gsf/400413 to your computer and use it in GitHub Desktop.
Save gsf/400413 to your computer and use it in GitHub Desktop.
class RowDict(dict):
"""
Subclass of dict that joins sequences and encodes to utf-8 on get.
Encoding to utf-8 is necessary for Python's csv library because it
can't handle unicode.
>>> row = RowDict()
>>> row['bob'] = ['Montalb\\xe2an, Ricardo', 'Roddenberry, Gene']
>>> row.get('bob')
'Montalb\\xc3\\xa1n, Ricardo|Roddenberry, Gene'
>>> print row.get('bob')
Montalbán, Ricardo|Roddenberry, Gene
"""
def get(self, key, *args):
value = dict.get(self, key, *args)
if not value:
return ''
if hasattr(value, '__iter__'):
try:
value = '|'.join([x for x in value if x])
except UnicodeDecodeError:
value = '|'.join([x.decode('utf8') for x in value if x])
#return pymarc.marc8.marc8_to_unicode(value).encode('utf8')
try:
return value.encode('utf8')
except UnicodeDecodeError:
return value.decode('utf8', 'replace').encode('utf8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment