Last active
December 17, 2015 20:49
-
-
Save kdaily/5670701 to your computer and use it in GitHub Desktop.
Add the read group ID to each read name in a BAM or SAM file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Add the read group ID to each read name in a BAM or SAM file. | |
Must output to SAM format to get the header. | |
To directly convert to BAM, pipe to samtools view: | |
python add_rgid_to_qname.py INPUT.sam /dev/stdout | samtools view -Sb /dev/stdin > OUTPUT.bam | |
""" | |
import pysam | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser(description="Add read group ID to the read name (qname).") | |
parser.add_argument('input_file', help="SAM/BAM input file.") | |
parser.add_argument('output_file', help="SAM output file.") | |
opts = parser.parse_args() | |
if opts.input_file.endswith(".sam"): | |
input_file = pysam.Samfile(opts.input_file, "r") | |
elif opts.input_file.endswith(".bam"): | |
input_file = pysam.Samfile(opts.input_file, "rb") | |
else: | |
raise IOError, "Input not a sam or bam file." | |
assert opts.output_file.endswith(".sam"), "Can only output to SAM format." | |
output_file = pysam.Samfile(opts.output_file, "wh", template=input_file) | |
for read in input_file.fetch(): | |
read_tags = dict(read.tags) | |
read_RGID = str(read_tags['RG']) | |
read.qname = "%s_%s" % (read_RGID, read.qname) | |
output_file.write(read) | |
input_file.close() | |
output_file.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment