Created
April 14, 2012 02:46
-
-
Save rjurney/2381748 to your computer and use it in GitHub Desktop.
Reproducing AVRO-565
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from avro import schema, datafile, io | |
# Simplified to include only offending characters from Brazil, with on charset in the email header. | |
email_hash = {'body': "Verit\xc3\xa1\r\nEstat\xc3\xadstica\r\n"} | |
out_filename = '565.avro' | |
schema_string = """ | |
{ | |
"namespace": "agile.data.avro", | |
"name": "Email", | |
"type": "record", | |
"fields": [ | |
{"name":"body", "type": ["string", "null"]} | |
] | |
}""" | |
email_schema = schema.parse(schema_string) | |
rec_writer = io.DatumWriter(email_schema) | |
df_writer = datafile.DataFileWriter( | |
open(out_filename, 'wb'), | |
rec_writer, | |
email_schema | |
) | |
df_writer.append(email_hash) # Boom! | |
# Traceback (most recent call last): | |
# File "<stdin>", line 1, in <module> | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/datafile.py", line 188, in append | |
# self.datum_writer.write(datum, self.buffer_encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 771, in write | |
# self.write_data(self.writers_schema, datum, encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 802, in write_data | |
# self.write_record(writers_schema, datum, encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 890, in write_record | |
# self.write_data(field.type, datum.get(field.name), encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 800, in write_data | |
# self.write_union(writers_schema, datum, encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 880, in write_union | |
# self.write_data(writers_schema.schemas[index_of_schema], datum, encoder) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 780, in write_data | |
# encoder.write_utf8(datum) | |
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-_AVRO_VERSION_-py2.6.egg/avro/io.py", line 356, in write_utf8 | |
# datum = datum.encode("utf-8") | |
# UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 5: ordinal not in range(128) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment