Created
February 9, 2014 20:20
-
-
Save arahuja/8905391 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package edu.berkeley.cs.amplab.adam.cli | |
import java.io.File; | |
import edu.berkeley.cs.amplab.adam.models.ADAMVariantContext | |
import edu.berkeley.cs.amplab.adam.rdd.AdamContext._ | |
import edu.berkeley.cs.amplab.adam.rdd.variation.ADAMVariationContext._ | |
import edu.berkeley.cs.amplab.adam.util.ParquetLogger | |
import java.util.logging.Level | |
import org.apache.hadoop.mapreduce.Job | |
import org.apache.spark.{Logging, SparkContext} | |
import org.apache.spark.rdd.RDD | |
import org.kohsuke.args4j.Argument | |
import edu.berkeley.cs.amplab.adam.converters.VariantContextConverter | |
import fi.tkk.ics.hadoop.bam.{VariantContextWritable, VCFInputFormat} | |
import org.apache.hadoop.io.LongWritable | |
import parquet.hadoop.util.ContextUtil | |
import org.broadinstitute.variant.variantcontext.VariantContext | |
import edu.berkeley.cs.amplab.adam.avro._ | |
object TesterJob extends AdamCommandCompanion { | |
val commandName = "TesterJob" | |
val commandDescription = "Play with corresponding ADAM format" | |
def apply(cmdLine: Array[String]) = { | |
new TesterJob(Args4j[TesterJobArgs](cmdLine)) | |
} | |
} | |
class TesterJobArgs extends Args4jBase with ParquetArgs with SparkArgs { | |
@Argument(required = true, metaVar = "VCF", usage = "The VCF file to convert", index = 0) | |
var vcfFile: String = _ | |
@Argument(required = true, metaVar = "ADAM", usage = "Location to write ADAM Variant data", index = 1) | |
var outputPath: String = null | |
} | |
class TesterJob(val args: TesterJobArgs) extends AdamSparkCommand[TesterJobArgs] with Logging { | |
val companion = TesterJob | |
def run(sc: SparkContext, job: Job) { | |
log.info("Reading ADAMVCF file from %s".format(args.vcfFile)) | |
val variants : RDD[ADAMGenotype] = sc.adamLoad(args.vcfFile) | |
println(variants.count()) | |
println(variants.filter(x => x.varIsFiltered != null && x.varIsFiltered == true).count()) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment