Created
September 11, 2015 02:29
-
-
Save indraniel/9439b0dc461e7c2a6596 to your computer and use it in GitHub Desktop.
A golang VCF file filtering template
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"os" | |
"os/signal" | |
"strings" | |
"syscall" | |
"github.com/brentp/vcfgo" | |
"github.com/brentp/xopen" | |
"gopkg.in/alecthomas/kingpin.v2" | |
) | |
type VCF struct { | |
InitWriter bool | |
VcfReader *vcfgo.Reader | |
FileReader *xopen.Reader | |
FileWriter *xopen.Writer | |
FileName string | |
} | |
func (v *VCF) Close() error { | |
if v.FileName != "os.Stdout" && v.FileWriter != nil { | |
err := v.FileWriter.Close() | |
if err != nil { | |
return err | |
} | |
} | |
if v.FileReader != nil { | |
err := v.FileReader.Close() | |
if err != nil { | |
return err | |
} | |
} | |
return nil | |
} | |
func NewVCF(vcfFile string, mode byte) *VCF { | |
var vcf *VCF | |
if mode == 'r' { | |
validateVCF(vcfFile) | |
f, err := xopen.Ropen(vcfFile) | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
rdr, err := vcfgo.NewReader(f, false) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf reader : %s", err) | |
} | |
vcf = &VCF{FileName: vcfFile, FileReader: f, VcfReader: rdr} | |
} else if mode == 'w' && vcfFile == "os.Stdout" { | |
f, err := xopen.Wopen("-") | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: "os.Stdout", FileWriter: f} | |
} else if mode == 'w' { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
f, err := xopen.Wopen("-") | |
if err != nil { | |
log.Fatalf("Couldn't open file: %s", err) | |
} | |
vcf = &VCF{InitWriter: false, FileName: vcfFile, FileWriter: f} | |
} else { | |
msg := "Trouble creating VCF object: '%c' unknown mode!" | |
log.Fatalf(msg, mode) | |
} | |
return vcf | |
} | |
func (v *VCF) InitVCFWriter(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
_, err := vcfgo.NewWriter(v.FileWriter, variant.Header) | |
if err != nil { | |
log.Fatalf("Couldn't open vcf writer : %s", err) | |
} | |
if *flush == true { | |
v.FileWriter.Flush() | |
} | |
v.InitWriter = true | |
} | |
} | |
func (v *VCF) Write(variant *vcfgo.Variant) { | |
if v.InitWriter == false { | |
v.InitVCFWriter(variant) | |
} | |
fmt.Fprintln(v.FileWriter, variant) | |
if *flush == true { | |
v.FileWriter.Flush() | |
} | |
} | |
var ( | |
debug = kingpin.Flag("debug", "Enable debug mode.").Bool() | |
flush = kingpin.Flag("flush", "Flush output after every variant print").Bool() | |
inVCF = kingpin.Flag("in", "Input VCF").Required().String() | |
outVCF = kingpin.Flag("out", "Output VCF").Default("os.Stdout").String() | |
) | |
func main() { | |
kingpin.Version("0.0.1") | |
kingpin.Parse() | |
fmt.Println("The input file is: ", *inVCF) | |
fmt.Println("The output file is: ", *outVCF) | |
in := NewVCF(*inVCF, 'r') | |
defer in.Close() | |
out := NewVCF(*inVCF, 'w') | |
defer out.Close() | |
// setup signals for better "pipe"-ability of this program | |
sigs := make(chan os.Signal, 1) | |
done := make(chan bool, 1) | |
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE) | |
// await for a signal, if received then notify main program we're done | |
go func() { | |
_ = <-sigs | |
done <- true | |
}() | |
// run the "main" program and notify when we're done | |
go func() { | |
processVCF(in, out) | |
done <- true | |
}() | |
// wait for a goroutine to tell the main program it's finished | |
<-done | |
log.Println("All Done!") | |
} | |
func processVCF(in *VCF, out *VCF) { | |
variant := in.VcfReader.Read() | |
for variant != nil { | |
if ok := filterVariant(variant); ok { | |
out.Write(variant) | |
} | |
variant = in.VcfReader.Read() | |
} | |
} | |
func filterVariant(variant *vcfgo.Variant) bool { | |
return true | |
} | |
func checkExists(file string) { | |
if _, err := os.Stat(file); os.IsNotExist(err) { | |
log.Fatalf( | |
"Could not find '%s' on file system: %s", | |
file, err, | |
) | |
} | |
} | |
func validateVCF(vcfFile string) { | |
checkFileType(vcfFile, ".vcf", ".vcf.gz") | |
checkExists(vcfFile) | |
} | |
func checkFileType(path string, suffixTypes ...string) { | |
validType := false | |
for _, suffix := range suffixTypes { | |
if strings.HasSuffix(path, suffix) { | |
validType = true | |
break | |
} | |
} | |
all := strings.Join(suffixTypes, ", ") | |
if !validType { | |
log.Fatalf("'%s' isn't a valid %s file!", | |
path, | |
all, | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment