Created
April 4, 2013 16:18
-
-
Save jcoveney/5311795 to your computer and use it in GitHub Desktop.
Avro that works in python but not in java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a = """{ | |
"fields": [ | |
{ | |
"type": "int", | |
"name": "first" | |
}, | |
{ | |
"type": "string", | |
"name": "second" | |
}, | |
{ | |
"type": "string", | |
"name": "third" | |
}, | |
{ | |
"type": "long", | |
"name": "fourth" | |
}, | |
{ | |
"type": "double", | |
"name": "fifth" | |
} | |
], | |
"type": "record", | |
"name": "imarecord" | |
}""" | |
b = """{ | |
"first": 11617, | |
"second": "howdydoody", | |
"third": "hey", | |
"fourth": 2398740, | |
"fifth": 98723987.123 | |
}""" | |
c = """{ | |
"second": "howdydoody", | |
"first": 11617, | |
"fifth": 98723987.123, | |
"third": "hey", | |
"fourth": 2398740 | |
}""" | |
import avro.schema | |
from avro.datafile import DataFileReader, DataFileWriter | |
from avro.io import DatumReader, DatumWriter | |
import simplejson as json | |
writer = DataFileWriter(open("ab.avro", "w"), DatumWriter(), avro.schema.parse(a)) | |
writer.append(json.loads(b)) | |
writer.close() | |
writer = DataFileWriter(open("ac.avro", "w"), DatumWriter(), avro.schema.parse(a)) | |
writer.append(json.loads(c)) | |
writer.close() | |
# This is used in the java | |
import base64 | |
print base64.standard_b64encode(a) | |
print base64.standard_b64encode(b) | |
print base64.standard_b64encode(c) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import org.apache.hadoop.fs.FileSystem; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapred.FileOutputFormat; | |
import org.apache.hadoop.mapred.JobConf; | |
import org.apache.hadoop.mapred.RecordWriter; | |
import org.apache.hadoop.mapred.Reporter; | |
import org.apache.hadoop.mapred.lib.MultipleOutputFormat; | |
import org.apache.hadoop.util.Progressable; | |
import org.apache.avro.Schema; | |
import org.apache.avro.file.CodecFactory; | |
import org.apache.avro.file.DataFileWriter; | |
import org.apache.avro.generic.GenericDatumReader; | |
import org.apache.avro.generic.GenericDatumWriter; | |
import org.apache.avro.io.DecoderFactory; | |
import org.apache.commons.codec.binary.Base64; | |
public class Hrm { | |
public static void main(String[] args) { | |
try { | |
//These come from the prints above | |
String a = "ewogICJmaWVsZHMiOiBbCiAgICB7CiAgICAgICJ0eXBlIjogImludCIsCiAgICAgICJuYW1lIjogImZpcnN0IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAic2Vjb25kIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAic3RyaW5nIiwKICAgICAgIm5hbWUiOiAidGhpcmQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgIm5hbWUiOiAiZm91cnRoIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiZG91YmxlIiwKICAgICAgIm5hbWUiOiAiZmlmdGgiCiAgICB9CiAgXSwKICAidHlwZSI6ICJyZWNvcmQiLAogICJuYW1lIjogImltYXJlY29yZCIKfQ=="; | |
a = new String(Base64.decodeBase64(a)); | |
String b = "ewogICJmaXJzdCI6IDExNjE3LAogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgInRoaXJkIjogImhleSIsCiAgImZvdXJ0aCI6IDIzOTg3NDAsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzCn0="; | |
b = new String(Base64.decodeBase64(b)); | |
String c = "ewogICJzZWNvbmQiOiAiaG93ZHlkb29keSIsCiAgImZpcnN0IjogMTE2MTcsCiAgImZpZnRoIjogOTg3MjM5ODcuMTIzLAogICJ0aGlyZCI6ICJoZXkiLAogICJmb3VydGgiOiAyMzk4NzQwCn0="; | |
c = new String(Base64.decodeBase64(c)); | |
Schema avroSchema = new Schema.Parser().parse(a); | |
DataFileWriter<Object> writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema)); | |
writer.setCodec(CodecFactory.deflateCodec(6)); | |
writer.create(avroSchema, new FileOutputStream("ab_java.avro")); | |
GenericDatumReader<Object> reader = new GenericDatumReader<Object>(avroSchema); | |
Object datum = reader.read(null, DecoderFactory.get().jsonDecoder(avroSchema, value.toString())); | |
writer.append(datum); | |
writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>(avroSchema)); | |
writer.setCodec(CodecFactory.deflateCodec(6)); | |
writer.create(avroSchema, new FileOutputStream("ac_java.avro")); | |
} catch (Exception e) { | |
throw new RuntimeException(e); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment