Created
June 18, 2015 10:43
-
-
Save lindenb/1d570194258055c2dc0b to your computer and use it in GitHub Desktop.
Creating , Serializing data using apache avro and ga4gh schema ( vcf ngs variant json )
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.PHONY:all | |
all: avro-tools-1.7.7.jar schemas-0.5.1/src/main/resources/avro/variants.avpr src/test/TestAvro.java | |
rm -rf generated | |
mkdir -p generated | |
#generate java classes | |
java -jar $< compile protocol schemas-0.5.1/src/main/resources/avro/ ./generated | |
#compile classes | |
javac -d generated -cp avro-tools-1.7.7.jar -sourcepath generated:src generated/org/ga4gh/*.java src/test/TestAvro.java | |
# archive | |
jar cvf generated/ga4gh.jar -C generated org -C generated test | |
# run | |
java -cp avro-tools-1.7.7.jar:generated/ga4gh.jar test.TestAvro > variant.avro | |
#decode | |
java -jar avro-tools-1.7.7.jar tojson variant.avro | |
avro-tools-1.7.7.jar : | |
curl -L -o $@ "http://www.eng.lsu.edu/mirrors/apache/avro/avro-1.7.7/java/$@" | |
schemas-0.5.1/src/main/resources/avro/variants.avpr: avro-tools-1.7.7.jar schemas-0.5.1/src/main/resources/avro/variants.avdl | |
java -jar $< idl $(filter %.avdl,$^) > $@ | |
schemas-0.5.1/src/main/resources/avro/variants.avdl : | |
rm -rf schemas-0.5.1 | |
curl -L -o schema.zip "https://github.com/ga4gh/schemas/archive/v0.5.1.zip" | |
unzip schema.zip | |
rm schema.zip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"id":"rs1","variantSetId":"id1","names":[],"created":{"long":1434624257608},"updated":{"long":1434624257608},"referenceName":"chr1","start":1,"end":1,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs2","variantSetId":"id2","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":2,"end":2,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs3","variantSetId":"id3","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":3,"end":3,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs4","variantSetId":"id4","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":4,"end":4,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs5","variantSetId":"id5","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":5,"end":5,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs6","variantSetId":"id6","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":6,"end":6,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs7","variantSetId":"id7","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":7,"end":7,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs8","variantSetId":"id8","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":8,"end":8,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} | |
{"id":"rs9","variantSetId":"id9","names":[],"created":{"long":1434624257635},"updated":{"long":1434624257635},"referenceName":"chr1","start":9,"end":9,"referenceBases":"A","alternateBases":["C","T"],"info":{},"calls":[]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package test; | |
import org.apache.avro.*; | |
import org.apache.avro.io.*; | |
import org.apache.avro.file.*; | |
import org.apache.avro.specific.*; | |
import org.ga4gh.*; | |
import java.util.*; | |
public class TestAvro | |
{ | |
private void run() throws Exception | |
{ | |
//Write data of a schema. | |
DatumWriter<GAVariant> variantWriter = new SpecificDatumWriter<GAVariant>( GAVariant.getClassSchema() ); | |
DataFileWriter<GAVariant> dataWriter = new DataFileWriter<GAVariant>(variantWriter); | |
dataWriter.create(GAVariant.getClassSchema(), System.out); | |
for(int i=1;i< 10;++i) | |
{ | |
GAVariant variant = GAVariant.newBuilder() | |
.setReferenceName("chr1") | |
.setStart(i) | |
.setEnd(i) | |
.setId("rs"+i) | |
.setVariantSetId("id"+i) | |
.setReferenceBases("A") | |
.setCreated(System.currentTimeMillis()) | |
.setUpdated(System.currentTimeMillis()) | |
.setAlternateBases(Arrays.asList("C","T")) | |
.build() | |
; | |
dataWriter.append(variant); | |
} | |
dataWriter.close(); | |
} | |
public static void main(String args[]) | |
{ | |
try | |
{ | |
TestAvro app = new TestAvro(); | |
app.run(); | |
} | |
catch(Exception err) | |
{ | |
err.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment