Skip to content

Instantly share code, notes, and snippets.

@necrolyte2
Created November 28, 2014 21:45
Show Gist options
  • Save necrolyte2/a42afce8bb952535e0cd to your computer and use it in GitHub Desktop.
Save necrolyte2/a42afce8bb952535e0cd to your computer and use it in GitHub Desktop.
Outputs dnaplotter format from a given bam and reference file
#!/usr/bin/env python
import subprocess
import argparse
# Can test with
# python -m doctest dnaplotter.py
def count_ref_bases(reference):
'''
Counts all bases in a reference fasta file or already opened
file stream
# Tests
>>> stream = ['>id1','aaaa\\n','bbbb\\n','>id2','cccc\\n','dddd']
>>> count_ref_bases(stream)
16
'''
stream = None
# Determine if reference is a string or stream of lines
# Determins if supplied argument reference is able to be iterated over
if hasattr(reference,'__iter__'):
stream = reference
else:
stream = open(reference)
# Loop over every line
count = 0
for line in stream:
# Remove newlines
line = line.rstrip()
if line.startswith('>'): # line[0] == '>'
# Ignore ident lines
continue # Just go back to for loop line
else:
# Sum length of all non ident lines
count += len(line)
return count
def fixed_depth(bamdepth, reflength):
'''
bamdepth is stream of lines of text that are in the format
refname\tposition\tdepth
reflength is the sum of all bases in a reference file
#Tests
>>> bamdepth = ['ref1\t3\t10', 'ref1\t4\t10', 'ref1\t6\t10']
>>> reflength = 8
>>> list(fixed_depth(bamdepth, reflength))
[('ref1', 1, 0), ('ref1', 2, 0), ('ref1', 3, 10), ('ref1', 4, 10), ('ref1', 5, 0), ('ref1', 6, 10), ('ref1', 7, 0), ('ref1', 8, 0)]
'''
# Start base position at 1
curpos = 1
# Iterate over every line in bamdepth
for line in bamdepth:
# Remove newlines
line = line.rstrip()
# Split the line
ref, pos, depth = line.split()
# convert to int from string
pos = int(pos)
depth = int(depth)
# If current position and bamdepth output don't match
# then keep incrementing until they do
while pos != curpos:
yield ref, curpos, 0
curpos += 1
yield ref, curpos, depth
curpos += 1
# Now we are at the end of the bamdepth output
# but may not be at the end of the reflength
while curpos != reflength+1:
yield ref, curpos, 0
curpos += 1
def get_dnaplot(bamfile, reffile):
reflength = count_ref_bases(reffile)
# Define the shell command to run
cmd = ['samtools', 'depth', bamfile]
# Create a new process that runs our command using the shell
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
for line in fixed_depth(p.stdout, reflength):
yield line
def print_dnaplot(bamfile, reffile):
for line in get_dnaplot(bamfile, reffile):
print "{0} {1}".format(line[1], line[2])
def parse_args():
parser = argparse.ArgumentParser(
'''Get dnaplot output file to run in dnaplotter'''
)
# Adds required argument so user has to supply a bam file
parser.add_argument(
'bam',
help='Bam file to get depth data from'
)
# Adds required argument so user has to supply reference file
parser.add_argument(
'reference',
help='Reference file to get genome length from'
)
return parser.parse_args()
def main():
args = parse_args()
print_dnaplot(args.bam, args.reference)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment