public
Created

Avro that works in python but not in java

  • Download Gist
avro_issue_1
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
a = """{
"fields": [
{
"type": "int",
"name": "first"
},
{
"type": "string",
"name": "second"
},
{
"type": "string",
"name": "third"
},
{
"type": "long",
"name": "fourth"
},
{
"type": "double",
"name": "fifth"
}
],
"type": "record",
"name": "imarecord"
}"""
b = """{
"first": 11617,
"second": "howdydoody",
"third": "hey",
"fourth": 2398740,
"fifth": 98723987.123
}"""
c = """{
"second": "howdydoody",
"first": 11617,
"fifth": 98723987.123,
"third": "hey",
"fourth": 2398740
}"""
 
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import simplejson as json
 
writer = DataFileWriter(open("ab.avro", "w"), DatumWriter(), avro.schema.parse(a))
writer.append(json.loads(b))
writer.close()
 
writer = DataFileWriter(open("ac.avro", "w"), DatumWriter(), avro.schema.parse(a))
writer.append(json.loads(c))
writer.close()
 
# This is used in the java
 
import base64
 
print base64.standard_b64encode(a)
print base64.standard_b64encode(b)
print base64.standard_b64encode(c)
avro_issue_2
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
import java.io.FileOutputStream;
import java.io.IOException;
 
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.commons.codec.binary.Base64;
 
public class Hrm {
public static void main(String[] args) {
try {
//These come from the prints above
String a = "ewogICJmaWVsZHMiOiBbCiAgICB7CiAgICAgICJ0eXBlIjogImludCIsCiAgICAgICJuYW1lIjogImZpcnN0IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAic2Vjb25kIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAidGhpcmQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgIm5hbWUiOiAiZm91cnRoIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiZG91YmxlIiwKICAgICAgIm5hbWUiOiAiZmlmdGgiCiAgICB9CiAgXSwKICAidHlwZSI6ICJyZWNvcmQiLAogICJuYW1lIjogImltYXJlY29yZCIKfQ==";
a = new String(Base64.decodeBase64(a));
String b = "ewogICJmaXJzdCI6IDExNjE3LAogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgInRoaXJkIjogImhleSIsCiAgImZvdXJ0aCI6IDIzOTg3NDAsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzCn0=";
b = new String(Base64.decodeBase64(b));
String c = "ewogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgImZpcnN0IjogMTE2MTcsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzLAogICJ0aGlyZCI6ICJoZXkiLAogICJmb3VydGgiOiAyMzk4NzQwCn0=";
c = new String(Base64.decodeBase64(c));
 
Schema avroSchema = new Schema.Parser().parse(a);
 
DataFileWriter<Object> writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema));
writer.setCodec(CodecFactory.deflateCodec(6));
writer.create(avroSchema, new FileOutputStream("ab_java.avro"));
 
GenericDatumReader<Object> reader = new GenericDatumReader<Object>(avroSchema);
Object datum = reader.read(null, DecoderFactory.get().jsonDecoder(avroSchema, value.toString()));
writer.append(datum);
 
writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema));
writer.setCodec(CodecFactory.deflateCodec(6));
writer.create(avroSchema, new FileOutputStream("ac_java.avro"));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.