Skip to content

Instantly share code, notes, and snippets.

@indraniel
Created September 11, 2015 02:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save indraniel/9439b0dc461e7c2a6596 to your computer and use it in GitHub Desktop.
Save indraniel/9439b0dc461e7c2a6596 to your computer and use it in GitHub Desktop.
A golang VCF file filtering template
package main
import (
"fmt"
"log"
"os"
"os/signal"
"strings"
"syscall"
"github.com/brentp/vcfgo"
"github.com/brentp/xopen"
"gopkg.in/alecthomas/kingpin.v2"
)
type VCF struct {
InitWriter bool
VcfReader *vcfgo.Reader
FileReader *xopen.Reader
FileWriter *xopen.Writer
FileName string
}
func (v *VCF) Close() error {
if v.FileName != "os.Stdout" && v.FileWriter != nil {
err := v.FileWriter.Close()
if err != nil {
return err
}
}
if v.FileReader != nil {
err := v.FileReader.Close()
if err != nil {
return err
}
}
return nil
}
func NewVCF(vcfFile string, mode byte) *VCF {
var vcf *VCF
if mode == 'r' {
validateVCF(vcfFile)
f, err := xopen.Ropen(vcfFile)
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
rdr, err := vcfgo.NewReader(f, false)
if err != nil {
log.Fatalf("Couldn't open vcf reader : %s", err)
}
vcf = &VCF{FileName: vcfFile, FileReader: f, VcfReader: rdr}
} else if mode == 'w' && vcfFile == "os.Stdout" {
f, err := xopen.Wopen("-")
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
vcf = &VCF{InitWriter: false, FileName: "os.Stdout", FileWriter: f}
} else if mode == 'w' {
checkFileType(vcfFile, ".vcf", ".vcf.gz")
f, err := xopen.Wopen("-")
if err != nil {
log.Fatalf("Couldn't open file: %s", err)
}
vcf = &VCF{InitWriter: false, FileName: vcfFile, FileWriter: f}
} else {
msg := "Trouble creating VCF object: '%c' unknown mode!"
log.Fatalf(msg, mode)
}
return vcf
}
func (v *VCF) InitVCFWriter(variant *vcfgo.Variant) {
if v.InitWriter == false {
_, err := vcfgo.NewWriter(v.FileWriter, variant.Header)
if err != nil {
log.Fatalf("Couldn't open vcf writer : %s", err)
}
if *flush == true {
v.FileWriter.Flush()
}
v.InitWriter = true
}
}
func (v *VCF) Write(variant *vcfgo.Variant) {
if v.InitWriter == false {
v.InitVCFWriter(variant)
}
fmt.Fprintln(v.FileWriter, variant)
if *flush == true {
v.FileWriter.Flush()
}
}
var (
debug = kingpin.Flag("debug", "Enable debug mode.").Bool()
flush = kingpin.Flag("flush", "Flush output after every variant print").Bool()
inVCF = kingpin.Flag("in", "Input VCF").Required().String()
outVCF = kingpin.Flag("out", "Output VCF").Default("os.Stdout").String()
)
func main() {
kingpin.Version("0.0.1")
kingpin.Parse()
fmt.Println("The input file is: ", *inVCF)
fmt.Println("The output file is: ", *outVCF)
in := NewVCF(*inVCF, 'r')
defer in.Close()
out := NewVCF(*inVCF, 'w')
defer out.Close()
// setup signals for better "pipe"-ability of this program
sigs := make(chan os.Signal, 1)
done := make(chan bool, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE)
// await for a signal, if received then notify main program we're done
go func() {
_ = <-sigs
done <- true
}()
// run the "main" program and notify when we're done
go func() {
processVCF(in, out)
done <- true
}()
// wait for a goroutine to tell the main program it's finished
<-done
log.Println("All Done!")
}
func processVCF(in *VCF, out *VCF) {
variant := in.VcfReader.Read()
for variant != nil {
if ok := filterVariant(variant); ok {
out.Write(variant)
}
variant = in.VcfReader.Read()
}
}
func filterVariant(variant *vcfgo.Variant) bool {
return true
}
func checkExists(file string) {
if _, err := os.Stat(file); os.IsNotExist(err) {
log.Fatalf(
"Could not find '%s' on file system: %s",
file, err,
)
}
}
func validateVCF(vcfFile string) {
checkFileType(vcfFile, ".vcf", ".vcf.gz")
checkExists(vcfFile)
}
func checkFileType(path string, suffixTypes ...string) {
validType := false
for _, suffix := range suffixTypes {
if strings.HasSuffix(path, suffix) {
validType = true
break
}
}
all := strings.Join(suffixTypes, ", ")
if !validType {
log.Fatalf("'%s' isn't a valid %s file!",
path,
all,
)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment