Skip to content

Instantly share code, notes, and snippets.

@marceloleiva
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marceloleiva/4f4bcba91b4bee046fc6 to your computer and use it in GitHub Desktop.
Save marceloleiva/4f4bcba91b4bee046fc6 to your computer and use it in GitHub Desktop.
## http://pastebin.ubuntu.com/11974308/
## problemas con encoding Ã
import codecs
import csv
def gen():
f = codecs.open("Ventas.csv", "rb", "utf16")
yield next(f).encode("utf-8")
for i in f:
yield i.encode("raw_unicode_escape")
r = csv.DictReader(gen())
for line in r:
print(line)
>>> with open("Ventas.csv", "rb") as fh:
... data = fh.read().decode("utf16")
...
>>> for i, line in enumerate(data.split("\n")):
... print line.encode("latin1").decode("utf8") if i else line
import codecs
import csv
def a2u(s):
try:
return s.decode("utf-8")
except UnicodeDecodeError:
return s.decode("latin1")
def gen():
f = codecs.open("Ventas.csv", "rb", "utf-16-le")
yield next(f).encode("utf-8")
for i in f:
v = i.encode("raw_unicode_escape")
yield v
data = [map(a2u, i) for i in csv.reader(gen())]
for row in data:
for col in row:
# col es unicode; para imprimir, pasarlo a utf8 o lo que
# tengas la consola si no a consola directa python sabe, pero
# si lo mandás a un pipe no va a saber
print col.encode("utf-8"),
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment