Skip to content

Instantly share code, notes, and snippets.

@vkuznet
Created January 14, 2016 16:11
Show Gist options
  • Save vkuznet/27506907e9ff8a36790d to your computer and use it in GitHub Desktop.
Save vkuznet/27506907e9ff8a36790d to your computer and use it in GitHub Desktop.
Example of bulk writes to avro file
import json
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
# transform json into avro
name = 'test'
sfile= './%s.avsc' % name
schema = avro.schema.parse(open(sfile).read())
data = json.load(open('%s.json' % name, 'r'))
print("init data", data)
# setup writer
writer = DataFileWriter(open("%s.avro" % name, "a"), DatumWriter(), schema)
for idx in range(10):
data["foo"] = idx
print("write data", data)
writer.append(data)
writer.flush()
writer.close()
# read avro fil
reader = DataFileReader(open("%s.avro" % name, "r"), DatumReader())
for rec in reader:
print rec
reader.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment