Created
November 28, 2014 21:45
-
-
Save necrolyte2/a42afce8bb952535e0cd to your computer and use it in GitHub Desktop.
Outputs dnaplotter format from a given bam and reference file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import subprocess | |
import argparse | |
# Can test with | |
# python -m doctest dnaplotter.py | |
def count_ref_bases(reference): | |
''' | |
Counts all bases in a reference fasta file or already opened | |
file stream | |
# Tests | |
>>> stream = ['>id1','aaaa\\n','bbbb\\n','>id2','cccc\\n','dddd'] | |
>>> count_ref_bases(stream) | |
16 | |
''' | |
stream = None | |
# Determine if reference is a string or stream of lines | |
# Determins if supplied argument reference is able to be iterated over | |
if hasattr(reference,'__iter__'): | |
stream = reference | |
else: | |
stream = open(reference) | |
# Loop over every line | |
count = 0 | |
for line in stream: | |
# Remove newlines | |
line = line.rstrip() | |
if line.startswith('>'): # line[0] == '>' | |
# Ignore ident lines | |
continue # Just go back to for loop line | |
else: | |
# Sum length of all non ident lines | |
count += len(line) | |
return count | |
def fixed_depth(bamdepth, reflength): | |
''' | |
bamdepth is stream of lines of text that are in the format | |
refname\tposition\tdepth | |
reflength is the sum of all bases in a reference file | |
#Tests | |
>>> bamdepth = ['ref1\t3\t10', 'ref1\t4\t10', 'ref1\t6\t10'] | |
>>> reflength = 8 | |
>>> list(fixed_depth(bamdepth, reflength)) | |
[('ref1', 1, 0), ('ref1', 2, 0), ('ref1', 3, 10), ('ref1', 4, 10), ('ref1', 5, 0), ('ref1', 6, 10), ('ref1', 7, 0), ('ref1', 8, 0)] | |
''' | |
# Start base position at 1 | |
curpos = 1 | |
# Iterate over every line in bamdepth | |
for line in bamdepth: | |
# Remove newlines | |
line = line.rstrip() | |
# Split the line | |
ref, pos, depth = line.split() | |
# convert to int from string | |
pos = int(pos) | |
depth = int(depth) | |
# If current position and bamdepth output don't match | |
# then keep incrementing until they do | |
while pos != curpos: | |
yield ref, curpos, 0 | |
curpos += 1 | |
yield ref, curpos, depth | |
curpos += 1 | |
# Now we are at the end of the bamdepth output | |
# but may not be at the end of the reflength | |
while curpos != reflength+1: | |
yield ref, curpos, 0 | |
curpos += 1 | |
def get_dnaplot(bamfile, reffile): | |
reflength = count_ref_bases(reffile) | |
# Define the shell command to run | |
cmd = ['samtools', 'depth', bamfile] | |
# Create a new process that runs our command using the shell | |
p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
for line in fixed_depth(p.stdout, reflength): | |
yield line | |
def print_dnaplot(bamfile, reffile): | |
for line in get_dnaplot(bamfile, reffile): | |
print "{0} {1}".format(line[1], line[2]) | |
def parse_args(): | |
parser = argparse.ArgumentParser( | |
'''Get dnaplot output file to run in dnaplotter''' | |
) | |
# Adds required argument so user has to supply a bam file | |
parser.add_argument( | |
'bam', | |
help='Bam file to get depth data from' | |
) | |
# Adds required argument so user has to supply reference file | |
parser.add_argument( | |
'reference', | |
help='Reference file to get genome length from' | |
) | |
return parser.parse_args() | |
def main(): | |
args = parse_args() | |
print_dnaplot(args.bam, args.reference) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment