Skip to content

Instantly share code, notes, and snippets.

@mgraciano
Last active June 3, 2022 15:14
Show Gist options
  • Save mgraciano/87b284d7d365eda847f045cd3c61f242 to your computer and use it in GitHub Desktop.
Save mgraciano/87b284d7d365eda847f045cd3c61f242 to your computer and use it in GitHub Desktop.
Concise example of how to write an Avro record out as file and parse it with embedded and custom schema in Java
package mgraciano;
import java.io.File;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
public class HelloAvro {
private static final File FILE = new File("users.avro");
public static void main(String[] args) throws Exception {
// Build a schema
// final Schema schema = new Schema.Parser().parse(HelloAvro.class.getClassLoader()
// .getResourceAsStream("user.json"));
final Schema schema = SchemaBuilder
.record("user").doc("Usuário").namespace("example.avro")
.fields()
.name("name").doc("Nome").type().stringType().noDefault()
.name("favorite_number").doc("Número favorito").type().nullable().intType().noDefault()
.name("favorite_color").doc("Cor favorita").type().nullable().stringType().noDefault()
.endRecord();
final GenericRecord user1 = new GenericData.Record(schema);
user1.put("name", "Alyssa");
user1.put("favorite_number", 256);
// Leave favorite color null
final GenericRecord user2 = new GenericData.Record(schema);
user2.put("name", "Ben");
user2.put("favorite_number", 7);
user2.put("favorite_color", "red");
serialize(schema, user1, user2);
deserialize();
deserialize(SchemaBuilder
.record("user").doc("Usuário").namespace("example.avro")
.fields()
.name("razao_social").doc("Nome").aliases("name").type().stringType().noDefault()
.name("favorite_number").doc("Número favorito").type().nullable().intType().noDefault()
.name("favorite_color").doc("Cor favorita").type().nullable().stringType().noDefault()
.endRecord());
}
private static void serialize(final Schema schema, final GenericRecord... users) throws IOException {
// Serialize users to disk
final DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(writer)) {
fileWriter.create(schema, FILE);
for (GenericRecord user : users) {
fileWriter.append(user);
}
}
}
private static void deserialize() throws IOException {
final DatumReader<GenericRecord> reader = new GenericDatumReader<>();
final DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(FILE, reader);
while (dataFileReader.hasNext()) {
final GenericRecord record = dataFileReader.next();
final Schema schema = record.getSchema();
System.out.println("========== No Schema ==========");
System.out.println("Schema para " + schema.getDoc());
// System.out.println(schema.toString(true));
System.out.println();
schema.getFields().forEach(f -> {
System.out.println(">>> " + f.doc() + ": " + record.get(f.name()));
});
System.out.println(">>> nome: " + record.get("nome"));
System.out.println(">>> razao_social: " + record.get("razao_social"));
}
}
private static void deserialize(final Schema schema) throws IOException {
final DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
final DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(FILE, reader);
while (dataFileReader.hasNext()) {
final GenericRecord record = dataFileReader.next();
System.out.println(">>>>>>>>>> Schema >>>>>>>>>>");
System.out.println("Schema para " + schema.getDoc());
System.out.println();
schema.getFields().forEach(f -> {
System.out.println(">>> " + f.doc() + ": " + record.get(f.name()));
});
System.out.println(">>> nome: " + record.get("nome"));
System.out.println(">>> razao_social: " + record.get("razao_social"));
}
}
}
@luisvenezian
Copy link

luisvenezian commented Jun 3, 2022

É possível preparar o conjunto inteiro em memória e executar apenas uma escrita por aquivo invés de um append para cada linha?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment