Last active
September 16, 2015 05:06
-
-
Save indraniel/67004ada53dfd23cb7be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"math" | |
"os" | |
"os/signal" | |
"strconv" | |
"strings" | |
"syscall" | |
"github.com/brentp/vcfgo" | |
"github.com/brentp/xopen" | |
"gopkg.in/alecthomas/kingpin.v2" | |
) | |
type VCF struct { | |
InitWriter bool | |
VcfReader *vcfgo.Reader | |
FileReader *xopen.Reader | |
FileWriter *xopen.Writer | |
FileName string | |
} | |
func (v *VCF) Close() error { | |
if v.FileName != "os.Stdout" && v.FileWriter != nil { | |
err := v.FileWriter.Close() | |
if err != nil { | |
return err | |
} | |
} | |
if v.FileReader != nil { | |
err := v.FileReader.Close() | |
if err != nil { | |
return err | |
} | |
} | |
return nil | |
} | |
func NewVCF(vcfFile string, mode byte) *VCF { | |
var vcf *VCF | |
if mode == 'r' { | |
validateVCF(vcfFile) | |
f, err := xopen.Ropen(vcfFile) | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
rdr, err := vcfgo.NewReader(f, false) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf reader : %s", err) | |
} | |
vcf = &VCF{FileName: vcfFile, FileReader: f, VcfReader: rdr} | |
} else if mode == 'w' && vcfFile == "os.Stdout" { | |
f, err := xopen.Wopen("-") | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: "os.Stdout", FileWriter: f} | |
} else if mode == 'w' { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
f, err := xopen.Wopen(vcfFile) | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: vcfFile, FileWriter: f} | |
} else { | |
msg := "Trouble creating VCF object: '%c' unknown mode!" | |
log.Fatalf(msg, mode) | |
} | |
return vcf | |
} | |
func (v *VCF) InitVCFWriter(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
_, err := vcfgo.NewWriter(v.FileWriter, variant.Header) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf writer : %s", err) | |
} | |
if (*flush == true) || (v.FileName == "os.Stdout") { | |
v.FileWriter.Flush() | |
} | |
v.InitWriter = true | |
} | |
} | |
func (v *VCF) Write(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
v.InitVCFWriter(variant) | |
} | |
fmt.Fprintln(v.FileWriter, variant) | |
if (*flush == true) || (v.FileName == "os.Stdout") { | |
v.FileWriter.Flush() | |
} | |
} | |
var ( | |
debug = kingpin.Flag("debug", "Enable debug mode.").Bool() | |
flush = kingpin.Flag("flush", "Flush output after every variant print").Bool() | |
inVCF = kingpin.Flag("in", "Input VCF").Required().String() | |
) | |
func main() { | |
kingpin.Version("0.0.1") | |
kingpin.Parse() | |
fmt.Println("#The input file is: ", *inVCF) | |
in := NewVCF(*inVCF, 'r') | |
defer in.Close() | |
// setup signals for better "pipe"-ability of this program | |
sigs := make(chan os.Signal, 1) | |
done := make(chan bool, 1) | |
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) | |
// await for a signal, if received then notify main program we're done | |
go func() { | |
_ = <-sigs | |
done <- true | |
}() | |
// run the "main" program and notify when we're done | |
go func() { | |
log.Println("Started filtering") | |
processVCF(in) | |
log.Println("Successfully finished filtering") | |
done <- true | |
}() | |
// wait for a goroutine to tell the main program it's finished | |
<-done | |
log.Println("All Done!") | |
} | |
func iface(list []string) []interface{} { | |
vals := make([]interface{}, len(list)) | |
for i, v := range list { | |
vals[i] = v | |
} | |
return vals | |
} | |
func processVCF(in *VCF) { | |
cols := []string{ | |
"Chrom", "Position", "Id", "Filter", | |
"EUR_AF", "EUR_RF", "EUR_MAF", | |
"AFR_AF", "AFR_RF", "AFR_MAF", | |
"ROW_AF", "ROW_RF", "ROW_MAF", | |
"AF", "RF", "MAF", | |
} | |
headerFmt := strings.Repeat("%s\t", len(cols)) | |
headerFmt = strings.TrimSuffix(headerFmt, "\t") + "\n" | |
fmt.Printf(headerFmt, iface(cols)...) | |
for i, v := 0, in.VcfReader.Read(); v != nil; i, v = i+1, in.VcfReader.Read() { | |
variant := v.(*vcfgo.Variant) | |
fmt.Printf("%s\t%d\t%s\t%s\t", variant.Chrom(), variant.Pos, variant.Id(), variant.Filter) | |
populations := []string{"EUR_AF", "AFR_AF", "ROW_AF", "AF"} | |
for i, attr := range populations { | |
altFreq := getFreq(variant, attr) | |
freq := 1.0 - altFreq | |
maf := math.Min(freq, altFreq) | |
fmt.Printf("%.4f\t%.4f\t%.4f", altFreq, freq, maf) | |
if i == (len(populations) - 1) { | |
fmt.Printf("\n") | |
} else { | |
fmt.Printf("\t") | |
} | |
} | |
} | |
} | |
func getFreq(variant *vcfgo.Variant, attr string) float64 { | |
val, err := variant.Info().Get(attr) | |
if err != nil { | |
return 0.0 | |
} | |
strFreq := val.(string) | |
freq, err := strconv.ParseFloat(strFreq, 64) | |
if err != nil { | |
msg := "Couldn't parse the float number from string '%s' : %s" | |
log.Fatalf(msg, strFreq, err) | |
} | |
return freq | |
} | |
func checkExists(file string) { | |
if _, err := os.Stat(file); os.IsNotExist(err) { | |
log.Fatalf( | |
"Could not find '%s' on file system: %s", | |
file, err, | |
) | |
} | |
} | |
func validateVCF(vcfFile string) { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
checkExists(vcfFile) | |
} | |
func checkFileType(path string, suffixTypes ...string) { | |
validType := false | |
for _, suffix := range suffixTypes { | |
if strings.HasSuffix(path, suffix) { | |
validType = true | |
break | |
} | |
} | |
all := strings.Join(suffixTypes, ", ") | |
if !validType { | |
log.Fatalf("'%s' isn't a valid %s file!", | |
path, | |
all, | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment