Last active
September 17, 2015 22:53
-
-
Save indraniel/16fb0536dadccd05badb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"math" | |
"os" | |
"os/signal" | |
"strconv" | |
"strings" | |
"syscall" | |
"github.com/brentp/vcfgo" | |
"github.com/brentp/xopen" | |
"gopkg.in/alecthomas/kingpin.v2" | |
) | |
type VCF struct { | |
InitWriter bool | |
VcfReader *vcfgo.Reader | |
FileReader *xopen.Reader | |
FileWriter *xopen.Writer | |
FileName string | |
} | |
func (v *VCF) Close() error { | |
if v.FileName != "os.Stdout" && v.FileWriter != nil { | |
err := v.FileWriter.Close() | |
if err != nil { | |
return err | |
} | |
} | |
if v.FileReader != nil { | |
err := v.FileReader.Close() | |
if err != nil { | |
return err | |
} | |
} | |
return nil | |
} | |
func NewVCF(vcfFile string, mode byte) *VCF { | |
var vcf *VCF | |
if mode == 'r' { | |
validateVCF(vcfFile) | |
f, err := xopen.Ropen(vcfFile) | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
rdr, err := vcfgo.NewReader(f, false) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf reader : %s", err) | |
} | |
vcf = &VCF{FileName: vcfFile, FileReader: f, VcfReader: rdr} | |
} else if mode == 'w' && vcfFile == "os.Stdout" { | |
f, err := xopen.Wopen("-") | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: "os.Stdout", FileWriter: f} | |
} else if mode == 'w' { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
f, err := xopen.Wopen(vcfFile) | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: vcfFile, FileWriter: f} | |
} else { | |
msg := "Trouble creating VCF object: '%c' unknown mode!" | |
log.Fatalf(msg, mode) | |
} | |
return vcf | |
} | |
func (v *VCF) InitVCFWriter(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
_, err := vcfgo.NewWriter(v.FileWriter, variant.Header) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf writer : %s", err) | |
} | |
if (*flush == true) || (v.FileName == "os.Stdout") { | |
v.FileWriter.Flush() | |
} | |
v.InitWriter = true | |
} | |
} | |
func (v *VCF) Write(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
v.InitVCFWriter(variant) | |
} | |
fmt.Fprintln(v.FileWriter, variant) | |
if (*flush == true) || (v.FileName == "os.Stdout") { | |
v.FileWriter.Flush() | |
} | |
} | |
var ( | |
debug = kingpin.Flag("debug", "Enable debug mode.").Bool() | |
flush = kingpin.Flag("flush", "Flush output after every variant print").Bool() | |
inVCF = kingpin.Flag("in", "Input VCF").Required().String() | |
outVCF = kingpin.Flag("out", "Output VCF").Default("os.Stdout").String() | |
upperBound = kingpin.Flag("upper", "Upper Threshold Bound").Default("0.5").Float() | |
lowerBound = kingpin.Flag("lower", "Lower Threshold Bound").Default("0.1").Float() | |
) | |
func main() { | |
kingpin.Version("0.0.1") | |
kingpin.Parse() | |
fmt.Println("The input file is: ", *inVCF) | |
fmt.Println("The output file is: ", *outVCF) | |
in := NewVCF(*inVCF, 'r') | |
defer in.Close() | |
out := NewVCF(*outVCF, 'w') | |
defer out.Close() | |
// setup signals for better "pipe"-ability of this program | |
sigs := make(chan os.Signal, 1) | |
done := make(chan bool, 1) | |
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) | |
// await for a signal, if received then notify main program we're done | |
go func() { | |
_ = <-sigs | |
done <- true | |
}() | |
// run the "main" program and notify when we're done | |
go func() { | |
log.Println("Lower Bound:", *lowerBound) | |
log.Println("Upper Bound:", *upperBound) | |
log.Println("Started filtering") | |
processVCF(in, out, *lowerBound, *upperBound) | |
log.Println("Successfully finished filtering") | |
done <- true | |
}() | |
// wait for a goroutine to tell the main program it's finished | |
<-done | |
log.Println("All Done!") | |
} | |
func processVCF(in *VCF, out *VCF, lBound, uBound float64) { | |
for i, v := 0, in.VcfReader.Read(); v != nil; i, v = i+1, in.VcfReader.Read() { | |
variant := v.(*vcfgo.Variant) | |
if ok := filterVariant(variant, lBound, uBound); ok { | |
out.Write(variant) | |
} | |
if (i%100000 == 0) && (i != 0) { | |
log.Println(i, "variants proccessed") | |
} | |
} | |
} | |
func filterVariant(variant *vcfgo.Variant, lBound, uBound float64) bool { | |
if variant.Filter != "PASS" { | |
return false | |
} | |
mafOverall := getMAF(variant, "AF") | |
mafEUR := getMAF(variant, "EUR_AF") | |
mafAFR := getMAF(variant, "AFR_AF") | |
// fmt.Printf("AF: %.4f -- EUR: %.4f -- AFR: %.4f\n", mafOverall, mafEUR, mafAFR) | |
threshold := 0.05 | |
if !((mafOverall > threshold) && | |
(mafEUR > threshold) && | |
(mafAFR > threshold)) { | |
return false | |
} | |
/* get the alternative allelic frequencies */ | |
afOverall := getFreq(variant, "AF") | |
afEUR := getFreq(variant, "EUR_AF") | |
afAFR := getFreq(variant, "AFR_AF") | |
afROW := getFreq(variant, "ROW_AF") | |
// calculate the frequency differences between the ALT and REF | |
// for each population group | |
// | |
// Also notice: | |
// | |
// abs(AF - RF) = abs(AF - (1 - AF)) = abs( 2*AF - 1) | |
diffOverall := math.Abs(2.0*afOverall - 1.0) | |
diffEUR := math.Abs(2.0*afEUR - 1.0) | |
diffAFR := math.Abs(2.0*afAFR - 1.0) | |
diffROW := math.Abs(2.0*afROW - 1.0) | |
diffs := []float64{diffOverall, diffEUR, diffAFR, diffROW} | |
decision := true | |
for _, val := range diffs { | |
if (val >= lBound) && (val < uBound) { | |
decision = true | |
} else { | |
decision = false | |
break | |
} | |
} | |
return decision | |
} | |
func getMAF(variant *vcfgo.Variant, attr string) float64 { | |
freq := getFreq(variant, attr) | |
altFreq := 1.0 - freq | |
maf := math.Min(freq, altFreq) | |
return maf | |
} | |
func getFreq(variant *vcfgo.Variant, attr string) float64 { | |
val, err := variant.Info().Get(attr) | |
if err != nil { | |
return 0.0 | |
} | |
strFreq := val.(string) | |
freq, err := strconv.ParseFloat(strFreq, 64) | |
if err != nil { | |
msg := "Couldn't parse the float number from string '%s' : %s" | |
log.Fatalf(msg, strFreq, err) | |
} | |
return freq | |
} | |
func checkExists(file string) { | |
if _, err := os.Stat(file); os.IsNotExist(err) { | |
log.Fatalf( | |
"Could not find '%s' on file system: %s", | |
file, err, | |
) | |
} | |
} | |
func validateVCF(vcfFile string) { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
checkExists(vcfFile) | |
} | |
func checkFileType(path string, suffixTypes ...string) { | |
validType := false | |
for _, suffix := range suffixTypes { | |
if strings.HasSuffix(path, suffix) { | |
validType = true | |
break | |
} | |
} | |
all := strings.Join(suffixTypes, ", ") | |
if !validType { | |
log.Fatalf("'%s' isn't a valid %s file!", | |
path, | |
all, | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment