Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created June 18, 2015 10:43
Show Gist options
  • Save lindenb/1d570194258055c2dc0b to your computer and use it in GitHub Desktop.
Save lindenb/1d570194258055c2dc0b to your computer and use it in GitHub Desktop.
Creating , Serializing data using apache avro and ga4gh schema ( vcf ngs variant json )
.PHONY:all
all: avro-tools-1.7.7.jar schemas-0.5.1/src/main/resources/avro/variants.avpr src/test/TestAvro.java
rm -rf generated
mkdir -p generated
#generate java classes
java -jar $< compile protocol schemas-0.5.1/src/main/resources/avro/ ./generated
#compile classes
javac -d generated -cp avro-tools-1.7.7.jar -sourcepath generated:src generated/org/ga4gh/*.java src/test/TestAvro.java
# archive
jar cvf generated/ga4gh.jar -C generated org -C generated test
# run
java -cp avro-tools-1.7.7.jar:generated/ga4gh.jar test.TestAvro > variant.avro
#decode
java -jar avro-tools-1.7.7.jar tojson variant.avro
avro-tools-1.7.7.jar :
curl -L -o $@ "http://www.eng.lsu.edu/mirrors/apache/avro/avro-1.7.7/java/$@"
schemas-0.5.1/src/main/resources/avro/variants.avpr: avro-tools-1.7.7.jar schemas-0.5.1/src/main/resources/avro/variants.avdl
java -jar $< idl $(filter %.avdl,$^) > $@
schemas-0.5.1/src/main/resources/avro/variants.avdl :
rm -rf schemas-0.5.1
curl -L -o schema.zip "https://github.com/ga4gh/schemas/archive/v0.5.1.zip"
unzip schema.zip
rm schema.zip
{"id":"rs1","variantSetId":"id1","names":[],"created":{"long":1434624257608},"updated":{"long":1434624257608},"referenceName":"chr1","start":1,"end":1,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs2","variantSetId":"id2","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":2,"end":2,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs3","variantSetId":"id3","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":3,"end":3,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs4","variantSetId":"id4","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":4,"end":4,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs5","variantSetId":"id5","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":5,"end":5,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs6","variantSetId":"id6","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":6,"end":6,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs7","variantSetId":"id7","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":7,"end":7,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs8","variantSetId":"id8","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":8,"end":8,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
{"id":"rs9","variantSetId":"id9","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":9,"end":9,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]}
package test;
import org.apache.avro.*;
import org.apache.avro.io.*;
import org.apache.avro.file.*;
import org.apache.avro.specific.*;
import org.ga4gh.*;
import java.util.*;
public class TestAvro
{
private void run() throws Exception
{
//Write data of a schema.
DatumWriter<GAVariant> variantWriter = new SpecificDatumWriter<GAVariant>( GAVariant.getClassSchema() );
DataFileWriter<GAVariant> dataWriter = new DataFileWriter<GAVariant>(variantWriter);
dataWriter.create(GAVariant.getClassSchema(), System.out);
for(int i=1;i< 10;++i)
{
GAVariant variant = GAVariant.newBuilder()
.setReferenceName("chr1")
.setStart(i)
.setEnd(i)
.setId("rs"+i)
.setVariantSetId("id"+i)
.setReferenceBases("A")
.setCreated(System.currentTimeMillis())
.setUpdated(System.currentTimeMillis())
.setAlternateBases(Arrays.asList("C","T"))
.build()
;
dataWriter.append(variant);
}
dataWriter.close();
}
public static void main(String args[])
{
try
{
TestAvro app = new TestAvro();
app.run();
}
catch(Exception err)
{
err.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment