Skip to content

Instantly share code, notes, and snippets.

@walterst
Last active December 19, 2015 22:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walterst/6028738 to your computer and use it in GitHub Desktop.
Save walterst/6028738 to your computer and use it in GitHub Desktop.
Usage: python extract_bcs_from_fastq.py X Y Z A B where: X is input fastq file Y is output barcode reads fastq file Z is output reads (with barcode removed) fastq file A is size of barcode B is True/False for reverse complement of barcode before writing
#!/usr/bin/env python
from sys import argv
from cogent.parse.fastq import MinimalFastqParser
from cogent import DNA
f = open(argv[1], "U")
bc_out = open(argv[2], "w")
read_out = open(argv[3], "w")
barcode_size = int(argv[4])
if argv[5] == "True":
rc_bc = True
elif argv[5] == "False":
rc_bc = False
else:
raise ValueError,("Fifth argument must be 'True' or 'False' to set"
" reverse complement of barcodes.")
for data in MinimalFastqParser(f, strict=False):
curr_label = data[0].strip()
curr_bc_read = data[1].strip()[0:barcode_size]
curr_r3_read = data[1].strip()[barcode_size:]
curr_bc_qual = data[2].strip()[0:barcode_size]
curr_r3_qual = data[2].strip()[barcode_size:]
bc_out.write("@%s\n" % curr_label)
if rc_bc:
curr_bc_read = DNA.rc(curr_bc_read)
bc_out.write("%s\n" % curr_bc_read)
bc_out.write("+\n")
bc_out.write("%s\n" % curr_bc_qual)
read_out.write("@%s\n" % curr_label)
read_out.write("%s\n" % curr_r3_read)
read_out.write("+\n")
read_out.write("%s\n" % curr_r3_qual)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment