Skip to content

Instantly share code, notes, and snippets.

@kdaily
Last active December 17, 2015 20:49
Show Gist options
  • Save kdaily/5670701 to your computer and use it in GitHub Desktop.
Save kdaily/5670701 to your computer and use it in GitHub Desktop.
Add the read group ID to each read name in a BAM or SAM file.
"""Add the read group ID to each read name in a BAM or SAM file.
Must output to SAM format to get the header.
To directly convert to BAM, pipe to samtools view:
python add_rgid_to_qname.py INPUT.sam /dev/stdout | samtools view -Sb /dev/stdin > OUTPUT.bam
"""
import pysam
def main():
import argparse
parser = argparse.ArgumentParser(description="Add read group ID to the read name (qname).")
parser.add_argument('input_file', help="SAM/BAM input file.")
parser.add_argument('output_file', help="SAM output file.")
opts = parser.parse_args()
if opts.input_file.endswith(".sam"):
input_file = pysam.Samfile(opts.input_file, "r")
elif opts.input_file.endswith(".bam"):
input_file = pysam.Samfile(opts.input_file, "rb")
else:
raise IOError, "Input not a sam or bam file."
assert opts.output_file.endswith(".sam"), "Can only output to SAM format."
output_file = pysam.Samfile(opts.output_file, "wh", template=input_file)
for read in input_file.fetch():
read_tags = dict(read.tags)
read_RGID = str(read_tags['RG'])
read.qname = "%s_%s" % (read_RGID, read.qname)
output_file.write(read)
input_file.close()
output_file.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment