Skip to content

Instantly share code, notes, and snippets.

@winni2k
Last active March 30, 2020 15:31
Show Gist options
  • Save winni2k/7850d00bc8fec99aadd8087479ef9697 to your computer and use it in GitHub Desktop.
Save winni2k/7850d00bc8fec99aadd8087479ef9697 to your computer and use it in GitHub Desktop.
This is a script that sums up the bases at a position or region weighted by their base quality
import math
import yaml
import pysam
def main(samfile_name, chrom, start, end):
"""
:param samfile_name:
:param chrom: string
:param start: one-based, closed-closed interval
:param end: one-based, closed-closed interval
:return:
"""
samfile = pysam.AlignmentFile(samfile_name, "rb")
start = int(start) - 1
end = int(end)
for pileupcolumn in samfile.pileup(chrom, start, end):
if pileupcolumn.pos < start or end <= pileupcolumn.pos:
continue
pileupcolumn.set_min_base_quality(0)
meta = {
"position": f"{chrom}:{pileupcolumn.pos + 1}",
"coverage": len(pileupcolumn.get_query_sequences()),
}
allelic_depth = {"A": 0, "C": 0, "G": 0, "T": 0}
for base, qual in zip(
pileupcolumn.get_query_sequences(), pileupcolumn.get_query_qualities()
):
base = base.upper()
if base not in allelic_depth:
continue
real_qual = math.pow(10, (-qual / 10))
allelic_depth[base] += 1 - real_qual
for ad_base in allelic_depth:
if ad_base != base:
allelic_depth[ad_base] += real_qual / 3
meta["allelic_depth"] = allelic_depth
print(yaml.dump(meta))
print("\n")
samfile.close()
if __name__ == "__main__":
from sys import argv
main(samfile_name=argv[1], chrom=argv[2], start=argv[3], end=argv[4])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment