Created
November 1, 2012 21:49
-
-
Save bderickson/3996847 to your computer and use it in GitHub Desktop.
Avro Reducer that fails with "No field named chromosome in: null"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class AvroBamCoverageReducer | |
extends AvroReducer<String, Integer, Pair<GenericRecord, Integer>> { | |
public void reduce(String key, Iterator<Integer> values, | |
AvroCollector<Pair<GenericRecord, Integer>> collector, Reporter reporter) | |
throws IOException { | |
int sum = 0; | |
while(values.hasNext()) { | |
Integer value = values.next(); | |
sum += value.intValue(); | |
} | |
GenericRecord record = new GenericData.Record(coverageSchema); | |
String[] parts = key.toString().split(":"); | |
record.put("chromosome", parts[0]); | |
record.put("position", parts[1]); | |
record.put("depth", sum); | |
// I couldn't figure out how to have null as the second part of the pair | |
Pair pair = new Pair(record, new Integer(parts[0])); | |
collector.collect(pair); | |
} | |
} | |
public static void main(String[] args) throws Exception { | |
JobConf conf = new JobConf(); | |
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); | |
if (otherArgs.length != 2) { | |
System.err.println("Usage: BAMCoverage <in> <out>"); | |
System.exit(2); | |
} | |
Job job = new Job(conf, "BAM Coverage"); | |
job.setJarByClass(BAMCoverage.class); | |
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); | |
Path outputPath = new Path(otherArgs[1]); | |
outputPath.getFileSystem(conf).delete(outputPath); | |
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); | |
job.setMapperClass(BamCoverageMapper.class); | |
job.setInputFormatClass(AnySAMInputFormat.class); | |
Schema stringSchema = Schema.create(Schema.Type.STRING); | |
Schema intSchema = Schema.create(Schema.Type.INT); | |
Schema mapPairSchema = Pair.getPairSchema(stringSchema, intSchema); | |
AvroJob.setMapOutputSchema(conf, mapPairSchema); | |
AvroJob.setReducerClass(conf, AvroBamCoverageReducer.class); | |
Schema reducePairSchema = Pair.getPairSchema(coverageSchema, intSchema); | |
AvroJob.setOutputSchema(conf, reducePairSchema); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"type" : "record", | |
"name" : "PerPositionCoverage", | |
"namespace" : "edu.wustl.genome.hadoop.coverage", | |
"fields" : [ { | |
"name" : "chromosome", | |
"type" : "string" | |
}, { | |
"name" : "position", | |
"type" : "int" | |
}, { | |
"name" : "depth", | |
"type" : "int" | |
} ] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2012-11-01 16:27:26,906 WARN org.apache.hadoop.mapred.Child: Error running child | |
org.apache.avro.file.DataFileWriter$AppendWriteException: org.apache.avro.AvroRuntimeException: No field named chromosome in: null | |
at org.apache.avro.file.DataFileWriter.append(DataFileWriter.java:261) | |
at org.apache.avro.mapred.AvroOutputFormat$1.write(AvroOutputFormat.java:116) | |
at org.apache.avro.mapred.AvroOutputFormat$1.write(AvroOutputFormat.java:113) | |
at org.apache.hadoop.mapred.ReduceTask$3.collect(ReduceTask.java:446) | |
at org.apache.avro.mapred.HadoopReducer$ReduceCollector.collect(HadoopReducer.java:50) | |
at org.apache.avro.mapred.AvroReducer.reduce(AvroReducer.java:50) | |
at org.apache.avro.mapred.HadoopReducerBase.reduce(HadoopReducerBase.java:61) | |
at org.apache.avro.mapred.HadoopReducerBase.reduce(HadoopReducerBase.java:30) | |
at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:469) | |
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:417) | |
at org.apache.hadoop.mapred.Child$4.run(Child.java:270) | |
at java.security.AccessController.doPrivileged(Native Method) | |
at javax.security.auth.Subject.doAs(Subject.java:416) | |
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232) | |
at org.apache.hadoop.mapred.Child.main(Child.java:264) | |
Caused by: org.apache.avro.AvroRuntimeException: No field named chromosome in: null | |
at org.apache.avro.reflect.ReflectData.findField(ReflectData.java:194) | |
at org.apache.avro.reflect.ReflectData.getField(ReflectData.java:179) | |
at org.apache.avro.reflect.ReflectData.getField(ReflectData.java:96) | |
at org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:102) | |
at org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:65) | |
at org.apache.avro.reflect.ReflectDatumWriter.write(ReflectDatumWriter.java:102) | |
at org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:104) | |
at org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:65) | |
at org.apache.avro.reflect.ReflectDatumWriter.write(ReflectDatumWriter.java:102) | |
at org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:57) | |
at org.apache.avro.file.DataFileWriter.append(DataFileWriter.java:255) | |
... 14 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment