Skip to content

Instantly share code, notes, and snippets.

@srynobio
Created January 4, 2019 23:48
Show Gist options
  • Save srynobio/04c112e289a413c3e05b0cb2d071a659 to your computer and use it in GitHub Desktop.
Save srynobio/04c112e289a413c3e05b0cb2d071a659 to your computer and use it in GitHub Desktop.
package main
import (
"crypto/sha1"
"encoding/base64"
"fmt"
"log"
"runtime"
"sync"
arg "github.com/alexflint/go-arg"
"github.com/brentp/vcfgo"
"github.com/brentp/xopen"
)
func digest(bv []byte) string {
hasher := sha1.New()
hasher.Write(bv)
bs := hasher.Sum(nil)
sha := base64.StdEncoding.EncodeToString(bs)
return sha
}
func builder(file string, wg *sync.WaitGroup, dchan chan string) {
// open VCF file.
f, err := xopen.Ropen(file)
echeck("Can't open VCF file.", err)
defer f.Close()
r, err := vcfgo.NewReader(f, false)
echeck("Can't access vcf file.", err)
defer r.Close()
for {
read := r.Read()
if read == nil {
break
}
wg.Add(1)
go func(record *vcfgo.Variant) {
site := fmt.Sprintf("%s:%d:%d:%s:%s", record.Chromosome, record.Start(), record.End(), record.Reference, record.Alt())
siteDigest := digest([]byte(site))
dchan <- siteDigest
wg.Done()
}(read)
}
}
func echeck(m string, e error) {
if e != nil {
log.Println(m)
panic(e)
}
}
func main() {
var args struct {
VCF []string `help:"VCF file to collect concordance from. Space separated."`
CPUS int `help:"Number of CPUS workers to allow."`
}
args.CPUS = runtime.NumCPU()
arg.MustParse(&args)
// Check for two files.
if len(args.VCF) != 2 {
log.Panic("Two VCF files required.")
}
// Create wg and result channel.
var wg sync.WaitGroup
dchan := make(chan string)
// start the workers.
for _, file := range args.VCF {
wg.Add(1)
go builder(file, &wg, dchan)
defer wg.Done()
}
// Get value from channel.
for income := range dchan {
fmt.Println("income: ", income)
}
go func() {
close(dchan)
wg.Wait()
}()
fmt.Println("out!!")
}
// You can create another goroutine outside the loop that
// waits on the waitgroup then closes the channel.
// Also move the ranging over the channel to outside the loop that starts the workers.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment