Skip to content

Instantly share code, notes, and snippets.

@brentp
Last active November 13, 2018 19:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brentp/d251b7376961a72143f5efd5b6b66fec to your computer and use it in GitHub Desktop.
Save brentp/d251b7376961a72143f5efd5b6b66fec to your computer and use it in GitHub Desktop.
import hts
import strformat
import os
import osproc
import tables
var
vcf_path = commandLineParams()[0]
fasta_path = commandLineParams()[1]
vcf: VCF
ovcf: VCF
ofa: File
reads = "bwa-hg002-dups.fa"
if not open(vcf, vcf_path):
quit "couldnt open vcf:" & vcf_path
if not open(ovcf, "-", mode="w"):
quit "couldnt open vcf to stdout"
if not open(ofa, reads, fmWrite):
quit "couldnt open reads file"
ovcf.header= vcf.header
var svtype: string
for v in vcf:
if v.info.get("SVTYPE", svtype) != Status.OK:
quit "couldn't get SVTYPE"
if svtype != "INS": continue
#assert ovcf.write_variant(v)
ofa.write(&">{v.CHROM}_{v.start}_{v.stop}_{v.ID}\n{v.ALT[0]}\n")
ofa.close()
var prefix = "bwa-hg002-dups"
assert 0 == execCmd(&"""bwa mem -R "@RG\tID:hg002\tSM:hg002" -t 3 {fasta_path} {reads} | samtools sort -o {prefix}.bam && samtools index {prefix}.bam""")
var bam:Bam
open(bam, prefix & ".bam")
var mappings = initTable[string, Record](32768)
for aln in bam:
# TODO: handle SA.
if aln.flag.secondary or aln.flag.supplementary or aln.flag.unmapped: continue
if aln.mapping_quality == 0: continue
var nm = tag[int](aln, "NM")
if not nm.isNone and nm.get.int > 20: continue
mappings[aln.qname] = aln.copy()
vcf.close()
if not open(vcf, vcf_path):
quit "couldnt reopen vcf:" & vcf_path
discard ovcf.write_header
var dup = "DUP"
var svt = ""
for v in vcf:
var key = &"{v.CHROM}_{v.start}_{v.stop}_{v.ID}"
if v.info.get("SVTYPE", svt) != Status.OK:
quit "couldn't get SVTYPE"
if key in mappings and svt == "INS":
var aln = mappings[key]
#stderr.write_line $aln.start, " ", $(v.start + 1)
var L = v.ALT[0].len.float64
var alnL = aln.stop - aln.start
if abs(aln.start - v.start) < int(0.05 * L) and abs(alnL - L.int) < int(0.05 * L):
if v.info.set("SVTYPE", dup) != Status.OK:
quit "couldn't set type to dup"
var stop = @[int32(v.start + v.ALT[0].len + 1)]
if v.info.set("END", stop) != Status.OK:
quit "couldn't set end of dup"
assert ovcf.write_variant(v)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment