public
Last active

Avro append-to-existing file example with the DataFileWriter.appendTo(…) API.

  • Download Gist
DFWAppendTest.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
package com.cloudera.example;
 
import java.io.IOException;
import java.io.OutputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class DFWAppendTest {
public static class Sample {
CharSequence foo;
 
public Sample(CharSequence bar) {
this.foo = bar;
}
}
 
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost");
conf.setInt("dfs.replication", 1);
FileSystem fs = FileSystem.get(conf);
Schema sample = ReflectData.get().getSchema(Sample.class);
ReflectDatumWriter<Sample> rdw = new ReflectDatumWriter<DFWAppendTest.Sample>(
Sample.class);
DataFileWriter<Sample> dfwo = new DataFileWriter<DFWAppendTest.Sample>(rdw);
Path filePath = new Path("/sample.avro");
OutputStream out = fs.create(filePath);
DataFileWriter<Sample> dfw = dfwo.create(sample, out);
dfw.append(new Sample("Eggs"));
dfw.append(new Sample("Spam"));
dfw.close();
out.close();
OutputStream aout = fs.append(filePath);
dfw = dfwo.appendTo(new FsInput(filePath, conf), aout);
dfw.append(new Sample("Monty"));
dfw.append(new Sample("Python"));
dfwo.close();
aout.close();
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.