Skip to content

Instantly share code, notes, and snippets.

@laginha
Created February 3, 2012 23:48
Show Gist options
  • Save laginha/1733811 to your computer and use it in GitHub Desktop.
Save laginha/1733811 to your computer and use it in GitHub Desktop.
Unicode csv reader
from unicodedata import normalize, combining
def unicode_( s ):
"""
Convert string to unicode & Remove accents
"""
ustr = u''.join( ucharlist( '%s'%s ) )
nkfd = normalize( 'NFKD', ustr )
return u''.join( [ c for c in nkfd if not combining(c) ] )
ucharlist = lambda a: [ unichr( ord(c) ) for c in a ]
ulist = lambda a: [ unicode_(i) for i in a ]
utuple = lambda a,b: ( unicode_(a), unicode_(b) )
udict = lambda a: dict( [ utuple(*i) for i in a.items() ] )
import csv
class CSV:
@staticmethod
def readlines(filename, delimiter=';'):
"""
Yield each csv's row at a time
"""
file_ = open( filename, 'rb' )
rows = csv.reader( file_, delimiter = delimiter )
for row in rows:
yield ulist( row )
@staticmethod
def readjson(filename, delimiter=';'):
"""
Yield each csv's row as a dictionary except the first row
"""
file_ = open( filename, 'rb' )
rows = csv.DictReader( file_, delimiter = delimiter )
for row in rows:
yield udict( row )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment