Skip to content

Instantly share code, notes, and snippets.

@jcoveney
Created April 4, 2013 16:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcoveney/5311795 to your computer and use it in GitHub Desktop.
Save jcoveney/5311795 to your computer and use it in GitHub Desktop.
Avro that works in python but not in java
a = """{
"fields": [
{
"type": "int",
"name": "first"
},
{
"type": "string",
"name": "second"
},
{
"type": "string",
"name": "third"
},
{
"type": "long",
"name": "fourth"
},
{
"type": "double",
"name": "fifth"
}
],
"type": "record",
"name": "imarecord"
}"""
b = """{
"first": 11617,
"second": "howdydoody",
"third": "hey",
"fourth": 2398740,
"fifth": 98723987.123
}"""
c = """{
"second": "howdydoody",
"first": 11617,
"fifth": 98723987.123,
"third": "hey",
"fourth": 2398740
}"""
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import simplejson as json
writer = DataFileWriter(open("ab.avro", "w"), DatumWriter(), avro.schema.parse(a))
writer.append(json.loads(b))
writer.close()
writer = DataFileWriter(open("ac.avro", "w"), DatumWriter(), avro.schema.parse(a))
writer.append(json.loads(c))
writer.close()
# This is used in the java
import base64
print base64.standard_b64encode(a)
print base64.standard_b64encode(b)
print base64.standard_b64encode(c)
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.commons.codec.binary.Base64;
public class Hrm {
public static void main(String[] args) {
try {
//These come from the prints above
String a = "ewogICJmaWVsZHMiOiBbCiAgICB7CiAgICAgICJ0eXBlIjogImludCIsCiAgICAgICJuYW1lIjogImZpcnN0IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAic2Vjb25kIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAidGhpcmQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgIm5hbWUiOiAiZm91cnRoIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiZG91YmxlIiwKICAgICAgIm5hbWUiOiAiZmlmdGgiCiAgICB9CiAgXSwKICAidHlwZSI6ICJyZWNvcmQiLAogICJuYW1lIjogImltYXJlY29yZCIKfQ==";
a = new String(Base64.decodeBase64(a));
String b = "ewogICJmaXJzdCI6IDExNjE3LAogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgInRoaXJkIjogImhleSIsCiAgImZvdXJ0aCI6IDIzOTg3NDAsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzCn0=";
b = new String(Base64.decodeBase64(b));
String c = "ewogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgImZpcnN0IjogMTE2MTcsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzLAogICJ0aGlyZCI6ICJoZXkiLAogICJmb3VydGgiOiAyMzk4NzQwCn0=";
c = new String(Base64.decodeBase64(c));
Schema avroSchema = new Schema.Parser().parse(a);
DataFileWriter<Object> writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema));
writer.setCodec(CodecFactory.deflateCodec(6));
writer.create(avroSchema, new FileOutputStream("ab_java.avro"));
GenericDatumReader<Object> reader = new GenericDatumReader<Object>(avroSchema);
Object datum = reader.read(null, DecoderFactory.get().jsonDecoder(avroSchema, value.toString()));
writer.append(datum);
writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema));
writer.setCodec(CodecFactory.deflateCodec(6));
writer.create(avroSchema, new FileOutputStream("ac_java.avro"));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment