Skip to content

Instantly share code, notes, and snippets.

@ngcrawford
Last active December 24, 2015 12:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ngcrawford/6797150 to your computer and use it in GitHub Desktop.
Save ngcrawford/6797150 to your computer and use it in GitHub Desktop.
Simple script to calculate base percentages from a BAM file.
import pysam
import argparse
from collections import Counter
def get_args():
"""Parse sys.argv"""
parser = argparse.ArgumentParser()
parser.add_argument('--bam',
required=True,
help='Path to BAM file.')
parser.add_argument('output',
type=argparse.FileType('w'),
help="Path to BAM file. Default is STOUT")
args = parser.parse_args()
return args
args = get_args()
samfile = pysam.Samfile( args.bam, "rb" )
iter = samfile.pileup()
header = ['chrm', 'pos', 'A-count','C-count','T-count','G-count', 'A-proportion','C-proportion','T-proportion','G-proportion']
args.output.write('\t'.join(header) + "\n")
for c, p in enumerate(iter):
bases = [pileupread.alignment.seq[pileupread.qpos] for pileupread in p.pileups]
base_counts = Counter(bases)
total = float(sum(base_counts.values()))
final_line = [ samfile.getrname(p.pileups[0].alignment.rname), p.pos]
final_line.extend( [base_counts[b] for b in ('A','C','T','G')] )
final_line.extend( ["{0:.2f}".format(round(base_counts[b]/total,2)) for b in ('A','C','T','G')] )
final_formated_line = '\t'.join(map(str, final_line))
args.output.write(final_formated_line + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment