Last active
June 3, 2022 15:14
-
-
Save mgraciano/87b284d7d365eda847f045cd3c61f242 to your computer and use it in GitHub Desktop.
Concise example of how to write an Avro record out as file and parse it with embedded and custom schema in Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package mgraciano; | |
import java.io.File; | |
import java.io.IOException; | |
import org.apache.avro.Schema; | |
import org.apache.avro.SchemaBuilder; | |
import org.apache.avro.file.DataFileReader; | |
import org.apache.avro.file.DataFileWriter; | |
import org.apache.avro.generic.GenericData; | |
import org.apache.avro.generic.GenericDatumReader; | |
import org.apache.avro.generic.GenericDatumWriter; | |
import org.apache.avro.generic.GenericRecord; | |
import org.apache.avro.io.DatumReader; | |
import org.apache.avro.io.DatumWriter; | |
public class HelloAvro { | |
private static final File FILE = new File("users.avro"); | |
public static void main(String[] args) throws Exception { | |
// Build a schema | |
// final Schema schema = new Schema.Parser().parse(HelloAvro.class.getClassLoader() | |
// .getResourceAsStream("user.json")); | |
final Schema schema = SchemaBuilder | |
.record("user").doc("Usuário").namespace("example.avro") | |
.fields() | |
.name("name").doc("Nome").type().stringType().noDefault() | |
.name("favorite_number").doc("Número favorito").type().nullable().intType().noDefault() | |
.name("favorite_color").doc("Cor favorita").type().nullable().stringType().noDefault() | |
.endRecord(); | |
final GenericRecord user1 = new GenericData.Record(schema); | |
user1.put("name", "Alyssa"); | |
user1.put("favorite_number", 256); | |
// Leave favorite color null | |
final GenericRecord user2 = new GenericData.Record(schema); | |
user2.put("name", "Ben"); | |
user2.put("favorite_number", 7); | |
user2.put("favorite_color", "red"); | |
serialize(schema, user1, user2); | |
deserialize(); | |
deserialize(SchemaBuilder | |
.record("user").doc("Usuário").namespace("example.avro") | |
.fields() | |
.name("razao_social").doc("Nome").aliases("name").type().stringType().noDefault() | |
.name("favorite_number").doc("Número favorito").type().nullable().intType().noDefault() | |
.name("favorite_color").doc("Cor favorita").type().nullable().stringType().noDefault() | |
.endRecord()); | |
} | |
private static void serialize(final Schema schema, final GenericRecord... users) throws IOException { | |
// Serialize users to disk | |
final DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); | |
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(writer)) { | |
fileWriter.create(schema, FILE); | |
for (GenericRecord user : users) { | |
fileWriter.append(user); | |
} | |
} | |
} | |
private static void deserialize() throws IOException { | |
final DatumReader<GenericRecord> reader = new GenericDatumReader<>(); | |
final DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(FILE, reader); | |
while (dataFileReader.hasNext()) { | |
final GenericRecord record = dataFileReader.next(); | |
final Schema schema = record.getSchema(); | |
System.out.println("========== No Schema =========="); | |
System.out.println("Schema para " + schema.getDoc()); | |
// System.out.println(schema.toString(true)); | |
System.out.println(); | |
schema.getFields().forEach(f -> { | |
System.out.println(">>> " + f.doc() + ": " + record.get(f.name())); | |
}); | |
System.out.println(">>> nome: " + record.get("nome")); | |
System.out.println(">>> razao_social: " + record.get("razao_social")); | |
} | |
} | |
private static void deserialize(final Schema schema) throws IOException { | |
final DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); | |
final DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(FILE, reader); | |
while (dataFileReader.hasNext()) { | |
final GenericRecord record = dataFileReader.next(); | |
System.out.println(">>>>>>>>>> Schema >>>>>>>>>>"); | |
System.out.println("Schema para " + schema.getDoc()); | |
System.out.println(); | |
schema.getFields().forEach(f -> { | |
System.out.println(">>> " + f.doc() + ": " + record.get(f.name())); | |
}); | |
System.out.println(">>> nome: " + record.get("nome")); | |
System.out.println(">>> razao_social: " + record.get("razao_social")); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
É possível preparar o conjunto inteiro em memória e executar apenas uma escrita por aquivo invés de um append para cada linha?