Skip to content

Instantly share code, notes, and snippets.

@indraniel
Last active September 16, 2015 05:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save indraniel/67004ada53dfd23cb7be to your computer and use it in GitHub Desktop.
Save indraniel/67004ada53dfd23cb7be to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"log"
"math"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"github.com/brentp/vcfgo"
"github.com/brentp/xopen"
"gopkg.in/alecthomas/kingpin.v2"
)
type VCF struct {
InitWriter bool
VcfReader *vcfgo.Reader
FileReader *xopen.Reader
FileWriter *xopen.Writer
FileName string
}
func (v *VCF) Close() error {
if v.FileName != "os.Stdout" && v.FileWriter != nil {
err := v.FileWriter.Close()
if err != nil {
return err
}
}
if v.FileReader != nil {
err := v.FileReader.Close()
if err != nil {
return err
}
}
return nil
}
func NewVCF(vcfFile string, mode byte) *VCF {
var vcf *VCF
if mode == 'r' {
validateVCF(vcfFile)
f, err := xopen.Ropen(vcfFile)
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
rdr, err := vcfgo.NewReader(f, false)
if err != nil {
log.Fatalf("Couldn't open vcf reader : %s", err)
}
vcf = &VCF{FileName: vcfFile, FileReader: f, VcfReader: rdr}
} else if mode == 'w' && vcfFile == "os.Stdout" {
f, err := xopen.Wopen("-")
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
vcf = &VCF{InitWriter: false, FileName: "os.Stdout", FileWriter: f}
} else if mode == 'w' {
checkFileType(vcfFile, ".vcf", ".vcf.gz")
f, err := xopen.Wopen(vcfFile)
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
vcf = &VCF{InitWriter: false, FileName: vcfFile, FileWriter: f}
} else {
msg := "Trouble creating VCF object: '%c' unknown mode!"
log.Fatalf(msg, mode)
}
return vcf
}
func (v *VCF) InitVCFWriter(variant *vcfgo.Variant) {
if v.InitWriter == false {
_, err := vcfgo.NewWriter(v.FileWriter, variant.Header)
if err != nil {
log.Fatalf("Couldn't open vcf writer : %s", err)
}
if (*flush == true) || (v.FileName == "os.Stdout") {
v.FileWriter.Flush()
}
v.InitWriter = true
}
}
func (v *VCF) Write(variant *vcfgo.Variant) {
if v.InitWriter == false {
v.InitVCFWriter(variant)
}
fmt.Fprintln(v.FileWriter, variant)
if (*flush == true) || (v.FileName == "os.Stdout") {
v.FileWriter.Flush()
}
}
var (
debug = kingpin.Flag("debug", "Enable debug mode.").Bool()
flush = kingpin.Flag("flush", "Flush output after every variant print").Bool()
inVCF = kingpin.Flag("in", "Input VCF").Required().String()
)
func main() {
kingpin.Version("0.0.1")
kingpin.Parse()
fmt.Println("#The input file is: ", *inVCF)
in := NewVCF(*inVCF, 'r')
defer in.Close()
// setup signals for better "pipe"-ability of this program
sigs := make(chan os.Signal, 1)
done := make(chan bool, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE)
// await for a signal, if received then notify main program we're done
go func() {
_ = <-sigs
done <- true
}()
// run the "main" program and notify when we're done
go func() {
log.Println("Started filtering")
processVCF(in)
log.Println("Successfully finished filtering")
done <- true
}()
// wait for a goroutine to tell the main program it's finished
<-done
log.Println("All Done!")
}
func iface(list []string) []interface{} {
vals := make([]interface{}, len(list))
for i, v := range list {
vals[i] = v
}
return vals
}
func processVCF(in *VCF) {
cols := []string{
"Chrom", "Position", "Id", "Filter",
"EUR_AF", "EUR_RF", "EUR_MAF",
"AFR_AF", "AFR_RF", "AFR_MAF",
"ROW_AF", "ROW_RF", "ROW_MAF",
"AF", "RF", "MAF",
}
headerFmt := strings.Repeat("%s\t", len(cols))
headerFmt = strings.TrimSuffix(headerFmt, "\t") + "\n"
fmt.Printf(headerFmt, iface(cols)...)
for i, v := 0, in.VcfReader.Read(); v != nil; i, v = i+1, in.VcfReader.Read() {
variant := v.(*vcfgo.Variant)
fmt.Printf("%s\t%d\t%s\t%s\t", variant.Chrom(), variant.Pos, variant.Id(), variant.Filter)
populations := []string{"EUR_AF", "AFR_AF", "ROW_AF", "AF"}
for i, attr := range populations {
altFreq := getFreq(variant, attr)
freq := 1.0 - altFreq
maf := math.Min(freq, altFreq)
fmt.Printf("%.4f\t%.4f\t%.4f", altFreq, freq, maf)
if i == (len(populations) - 1) {
fmt.Printf("\n")
} else {
fmt.Printf("\t")
}
}
}
}
func getFreq(variant *vcfgo.Variant, attr string) float64 {
val, err := variant.Info().Get(attr)
if err != nil {
return 0.0
}
strFreq := val.(string)
freq, err := strconv.ParseFloat(strFreq, 64)
if err != nil {
msg := "Couldn't parse the float number from string '%s' : %s"
log.Fatalf(msg, strFreq, err)
}
return freq
}
func checkExists(file string) {
if _, err := os.Stat(file); os.IsNotExist(err) {
log.Fatalf(
"Could not find '%s' on file system: %s",
file, err,
)
}
}
func validateVCF(vcfFile string) {
checkFileType(vcfFile, ".vcf", ".vcf.gz")
checkExists(vcfFile)
}
func checkFileType(path string, suffixTypes ...string) {
validType := false
for _, suffix := range suffixTypes {
if strings.HasSuffix(path, suffix) {
validType = true
break
}
}
all := strings.Join(suffixTypes, ", ")
if !validType {
log.Fatalf("'%s' isn't a valid %s file!",
path,
all,
)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment