Skip to content

Instantly share code, notes, and snippets.

@sathyaprakashg
Created November 7, 2020 21:26
Show Gist options
  • Save sathyaprakashg/f423291be7be6f9d96b9cb850fc72edf to your computer and use it in GitHub Desktop.
Save sathyaprakashg/f423291be7be6f9d96b9cb850fc72edf to your computer and use it in GitHub Desktop.
Hudi schema evolution testing
import java.io.ByteArrayOutputStream
import java.math.BigDecimal
import org.apache.avro.{LogicalTypes, Schema, Conversions}
import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
object SchemaEvolutionTest {
def main(args: Array[String]): Unit = {
val writerSchemaString ="""{
| "type" : "record",
| "name" : "triprec",
| "fields" : [
| {
| "name" : "driver",
| "type" : "string"
| },
| {
| "name" : "height",
| "type" : {
| "type" : "fixed",
| "name" : "fixed",
| "namespace": "hoodie.source.hoodie_source.height",
| "size" : 5,
| "logicalType" : "decimal",
| "precision" : 10,
| "scale": 6
| }
| }
| ]
|}
| """.stripMargin
val readerSchemaString ="""{
| "type" : "record",
| "name" : "triprec",
| "fields" : [
| {
| "name" : "driver",
| "type" : "string"
| },
| {
| "name" : "height",
| "type" : {
| "type" : "fixed",
| "name" : "fixed",
| "size" : 5,
| "logicalType" : "decimal",
| "precision" : 10,
| "scale": 6
| }
| },
| {
| "name" : "rider",
| "type" : ["null","string"],
| "default" : null
| }
| ]
|}
| """.stripMargin
val writerSchema = new Schema.Parser().parse(writerSchemaString)
val readerSchema = new Schema.Parser().parse(readerSchemaString)
val record = new GenericData.Record(writerSchema)
record.put("driver", "John")
val bigDecimal = new BigDecimal("5.900000")
val decimalSchema = writerSchema.getField("height").schema
val decimalConversions = new Conversions.DecimalConversion
val genericFixed = decimalConversions.toFixed(bigDecimal, decimalSchema, LogicalTypes.decimal(10, 6))
record.put("height", genericFixed)
val gWriter = new GenericDatumWriter[GenericRecord](readerSchema)
val gReader = new GenericDatumReader[GenericRecord](readerSchema)
val inStream = new ByteArrayOutputStream
val encoder = EncoderFactory.get.binaryEncoder(inStream, null)
gWriter.write(record, encoder)
encoder.flush()
val decoderTemp = DecoderFactory.get.binaryDecoder(inStream.toByteArray, null)
val transformedRec = gReader.read(null, decoderTemp)
}
}
@sathyaprakashg
Copy link
Author

Error

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 2
	at org.apache.avro.generic.GenericData$Record.get(GenericData.java:212)
	at org.apache.avro.generic.GenericData.getField(GenericData.java:697)
	at org.apache.avro.generic.GenericData.getField(GenericData.java:712)
	at org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:164)
	at org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:156)
	at org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:118)
	at org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
	at org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:62)
	at SchemaEvolutionTest$.main(SchemaEvolutionTest.scala:81)
	at SchemaEvolutionTest.main(SchemaEvolutionTest.scala)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment