Last active
January 12, 2019 02:33
-
-
Save kmhernan/fdc5cef7172bc1e6a5503c0a68c2ee28 to your computer and use it in GitHub Desktop.
Proof of concept of me playing around with hts-nim via implementing samtools flagstat. This is not production. This was just late night playing around.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Proof-of-concept test of hts-nim flagstat. Not a production piece. | |
# Nim Compiler Version 0.19.0 | |
# nim c -d:realease | |
import hts | |
import strutils | |
type | |
CountArray = array[0..1, int] | |
proc main(): int = | |
# Just hardcoding inputs and the count arrays cause lazy | |
var | |
bam: Bam | |
ifil = "file.bam" | |
totals: CountArray = [0, 0] | |
secondary: CountArray = [0, 0] | |
supplementary: CountArray = [0, 0] | |
duplicates: CountArray = [0, 0] | |
mapped: CountArray = [0, 0] | |
paired: CountArray = [0, 0] | |
first: CountArray = [0, 0] | |
second: CountArray = [0, 0] | |
proper: CountArray = [0, 0] | |
singleton: CountArray = [0, 0] | |
pair_map: CountArray = [0, 0] | |
mate_chrom: CountArray = [0, 0] | |
mate_chrom_hq: CountArray = [0, 0] | |
mapq_cutoff: uint8 = 5 | |
idx = 0 | |
# open bam | |
open(bam, ifil, threads=4) | |
# no checking anything here y'all, just diving in | |
for aln in bam: | |
# Basically following same ifelse loop of samtools flagstat | |
if aln.flag.qcfail: | |
idx = 1 | |
totals[idx] += 1 | |
if aln.flag.secondary: | |
secondary[idx] += 1 | |
elif aln.flag.supplementary: | |
supplementary[idx] += 1 | |
elif aln.flag.pair: | |
paired[idx] += 1 | |
if aln.flag.proper_pair and not aln.flag.unmapped: | |
proper[idx] += 1 | |
if aln.flag.read1: | |
first[idx] += 1 | |
if aln.flag.read2: | |
second[idx] += 1 | |
if aln.flag.mate_unmapped and not aln.flag.unmapped: | |
singleton[idx] += 1 | |
if not aln.flag.mate_unmapped and not aln.flag.unmapped: | |
pair_map[idx] += 1 | |
if aln.mate_tid != aln.tid: | |
mate_chrom[idx] += 1 | |
if aln.mapping_quality >= mapq_cutoff: | |
mate_chrom_hq[idx] += 1 | |
if not aln.flag.unmapped: | |
mapped[idx] += 1 | |
if aln.flag.dup: | |
duplicates[idx] += 1 | |
# Lazy printing | |
echo format("$1 + $2 in total (QC-passed reads + QC-failed reads)", totals[0], totals[1]) | |
echo format("$1 + $2 secondary", secondary[0], secondary[1]) | |
echo format("$1 + $2 supplementary", supplementary[0], supplementary[1]) | |
echo format("$1 + $2 duplicates", duplicates[0], duplicates[1]) | |
echo format("$1 + $2 mapped", mapped[0], mapped[1]) | |
echo format("$1 + $2 paired in sequencing", paired[0], paired[1]) | |
echo format("$1 + $2 read1", first[0], first[1]) | |
echo format("$1 + $2 read2", second[0], second[1]) | |
echo format("$1 + $2 properly paired", proper[0], proper[1]) | |
echo format("$1 + $2 with itself and mate mapped", pair_map[0], pair_map[1]) | |
echo format("$1 + $2 singletons", singleton[0], singleton[1]) | |
echo format("$1 + $2 with mate mapped to a different chr", mate_chrom[0], mate_chrom[1]) | |
echo format("$1 + $2 with mate mapped to a different chr (mapQ>=5)", mate_chrom_hq[0], mate_chrom_hq[1]) | |
return 0 | |
when isMainModule: | |
discard main() |
also, then when you initalize the vars, you can just use:
var
totals: CountArray
instead of:
var
totals: CountArray = [0, 0]
Hey thanks for the comments. I honestly didn’t expect it to leave that folder it was in when I was just playing around but I didn’t know it would initialize to 0? So good to know.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've not seen:
I would just use:
this should make no difference when compiled but is more idiomatic at least.