Skip to content

Instantly share code, notes, and snippets.

@dpryan79
Last active March 10, 2018 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dpryan79/4bdce89e1f7573b5339c to your computer and use it in GitHub Desktop.
Save dpryan79/4bdce89e1f7573b5339c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# This is just a quick hack and hasn't been tested, but it'll likely work
import pysam
import argparse
import gzip
def writeAlignment(of, al):
name = al.query_name
seq = al.query_sequence
qual = al.query_qualities
rc = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
# Reverse complement?
if al.is_reverse:
qual = qual[::-1]
seq = seq[::-1]
seq = "".join([rc[x] for x in seq])
qual = "".join([chr(33 + x) for x in qual])
of.write("@{0}\n{1}\n+\n{2}\n".format(name, seq, qual))
parser = argparse.ArgumentParser(description="Take a BAM file and write one or more (gzipped) fastq files.")
parser.add_argument("ifile", metavar="input.bam", help="The input file. It does not need to be sorted.")
parser.add_argument("pref", metavar="output_prefix", help="Output will be written to output_prefix_R1.fastq.gz, output_prefix_R2.fastq.gz, and output_prefix.fastq.gz")
args = parser.parse_args()
bam = pysam.Samfile(args.ifile, "rb")
of1 = gzip.GzipFile("{0}_R1.fastq.gz".format(args.pref), "wb")
of2 = gzip.GzipFile("{0}_R2.fastq.gz".format(args.pref), "wb")
of = gzip.GzipFile("{0}.fastq.gz".format(args.pref), "wb")
missing = dict()
for read in bam.fetch():
# Exclude secondary/supplementary
if read.is_secondary or read.is_supplementary:
continue
if read.is_paired:
# Was the mate already found?
if read.query_name in missing.keys():
if read.is_read1:
writeAlignment(of1, read)
writeAlignment(of2, missing[read.query_name])
else:
writeAlignment(of2, read)
writeAlignment(of1, missing[read.query_name])
del missing[read.query_name]
else:
missing[read.query_name] = read
else:
writeAlignment(of, read)
for read in missing:
writeAlignment(of, read)
# One could track whether paired/single-end reads were found and delete empty files
bam.close()
of1.close()
of2.close()
of.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment