Skip to content

Instantly share code, notes, and snippets.

@andremrsantos
Created January 24, 2019 14:24
Show Gist options
  • Save andremrsantos/e1530a20ad2c78e0a027b728525d6327 to your computer and use it in GitHub Desktop.
Save andremrsantos/e1530a20ad2c78e0a027b728525d6327 to your computer and use it in GitHub Desktop.
Parsing gene mutation burden fails on -d:release
import os
import tables
import strutils
import sequtils
import algorithm
import streams
import hts
proc main() =
## Open VCF file
var vcf : VCF
assert open(vcf, paramStr(1))
## Table
var burden_table = initTable[string, seq[int32]]()
## Annotation holders
var
ann_info : string
anns : seq[string]
vals : seq[string]
gt_values : seq[int32] = newSeq[int32](vcf.n_samples)
alt_count : seq[int32] = newSeq[int32](vcf.n_samples)
genotypes : Genotypes
iter: int = 0
## loop over variant vords
for v in vcf:
if v.info.get("ANN", ann_info) != Status.OK:
continue
genotypes = genotypes(v.format, gt_values)
for ann in split(ann_info, ","):
vals = split(ann, "|")
var idx = find(v.ALT, vals[0]) + 1
var i = 0
fill(alt_count, 0)
for g in genotypes:
if value(g[0]) == idx: inc(alt_count[i])
if value(g[1]) == idx: inc(alt_count[i])
inc(i)
var key = join([vals[3], vals[2]], "\t")
discard hasKeyOrPut(burden_table, key, newSeq[int32](vcf.n_samples))
for i, val in alt_count:
burden_table[key][i] += val
inc(iter)
if iter mod 10_000 == 0: write(stderr, ".")
var header = "gene\timpact"
for sample in vcf.samples:
header.add("\t")
header.add(sample)
writeLine(stdout, header)
for k, v in burden_table:
writeLine(stdout, k, "\t", join(v, "\t"))
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment