Skip to content

Instantly share code, notes, and snippets.

@arahuja
Created February 9, 2014 20:20
Show Gist options
  • Save arahuja/8905391 to your computer and use it in GitHub Desktop.
Save arahuja/8905391 to your computer and use it in GitHub Desktop.
package edu.berkeley.cs.amplab.adam.cli
import java.io.File;
import edu.berkeley.cs.amplab.adam.models.ADAMVariantContext
import edu.berkeley.cs.amplab.adam.rdd.AdamContext._
import edu.berkeley.cs.amplab.adam.rdd.variation.ADAMVariationContext._
import edu.berkeley.cs.amplab.adam.util.ParquetLogger
import java.util.logging.Level
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.rdd.RDD
import org.kohsuke.args4j.Argument
import edu.berkeley.cs.amplab.adam.converters.VariantContextConverter
import fi.tkk.ics.hadoop.bam.{VariantContextWritable, VCFInputFormat}
import org.apache.hadoop.io.LongWritable
import parquet.hadoop.util.ContextUtil
import org.broadinstitute.variant.variantcontext.VariantContext
import edu.berkeley.cs.amplab.adam.avro._
object TesterJob extends AdamCommandCompanion {
val commandName = "TesterJob"
val commandDescription = "Play with corresponding ADAM format"
def apply(cmdLine: Array[String]) = {
new TesterJob(Args4j[TesterJobArgs](cmdLine))
}
}
class TesterJobArgs extends Args4jBase with ParquetArgs with SparkArgs {
@Argument(required = true, metaVar = "VCF", usage = "The VCF file to convert", index = 0)
var vcfFile: String = _
@Argument(required = true, metaVar = "ADAM", usage = "Location to write ADAM Variant data", index = 1)
var outputPath: String = null
}
class TesterJob(val args: TesterJobArgs) extends AdamSparkCommand[TesterJobArgs] with Logging {
val companion = TesterJob
def run(sc: SparkContext, job: Job) {
log.info("Reading ADAMVCF file from %s".format(args.vcfFile))
val variants : RDD[ADAMGenotype] = sc.adamLoad(args.vcfFile)
println(variants.count())
println(variants.filter(x => x.varIsFiltered != null && x.varIsFiltered == true).count())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment