Last active
January 11, 2016 20:47
-
-
Save mikerobeson/e5c0f0678a4785f8cf05 to your computer and use it in GitHub Desktop.
Removes index reads that no longer have a corresponding read in the R1 / R2 or merged fastq file. This helps with input into QIIME's split_libraries_fastq.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# There are times when the R1/R2 reads or merged reads fastq files become out of sync | |
# with its respective index reads file (quality filtering, failed merges, etc...). This | |
# will cause the `split_libraries_fastq.py` script in QIIME to fail. | |
# This script will iterate through the index reads file and remove any indexes that do not | |
# have any corresponding read in the main reads file; making appropriate input for | |
# `split_libraries_fastq.py`. | |
# I pulled this chunk of code out the `join_paired_ends.py` script I wrote for QIIME v1.8 | |
# and later. Specifically, the code activated by the `-b` flag of `join_paired_ends.py`. | |
# This will be useful in cases where alterate quality filtering or paired-end merging | |
# software is utilized; and one needs to rsync the index reads file. | |
from skbio.parse.sequences import parse_fastq | |
from skbio.format.sequences import format_fastq_record | |
from qiime.util import qiime_open | |
from optparse import OptionParser | |
import os | |
import gzip | |
def write_synced_barcodes_fastq(joined_fh, index_fh, output_fh): | |
"""Writes new index file based on surviving assembled paired-ends. | |
-joined_fh : file handle to paired-end assembled fastq file | |
-index_fh : file handle to index / barcode reads fastq file | |
-output_fh : file handle to write updated barcodes file | |
This function iterates through the joined reads file and index file. | |
Only those index-reads within the file at index_fp, that have headers | |
matching those within the joined-pairs at joined_fp, are written | |
to file. | |
WARNING: Assumes reads are in the same order in both files, | |
except for cases in which the corresponding | |
read in the joined file is missing (i.e. pairs | |
failed to assemble). | |
""" | |
# Set up iterators | |
index_fastq_iter = parse_fastq(index_fh, strict=False, enforce_qual_range=False) | |
joined_fastq_iter = parse_fastq(joined_fh, strict=False, enforce_qual_range=False) | |
# Write barcodes / index reads that we observed within | |
# the joined paired-ends. Warn if index and joined data | |
# are not in order. | |
for joined_label, joined_seq, joined_qual in joined_fastq_iter: | |
index_label, index_seq, index_qual = index_fastq_iter.next() | |
while joined_label != index_label: | |
try: | |
index_label, index_seq, index_qual = index_fastq_iter.next() | |
except StopIteration: | |
raise StopIteration("\n\nReached end of index-reads file" + | |
" before iterating through joined paired-end-reads file!" + | |
" Except for missing paired-end reads that did not survive" + | |
" assembly, your index and paired-end reads files must be in" + | |
" the same order! Also, check that the index-reads and" + | |
" paired-end reads have identical headers. The last joined" + | |
" paired-end ID processed was:\n\'%s\'\n" % (joined_label)) | |
else: | |
fastq_string = format_fastq_record(index_label, index_seq, index_qual) | |
output_fh.write(fastq_string) | |
def main(): | |
usage = "\n %prog -i reads.fastq -b barcodes.fastq -o updated.barcodes.fastq" | |
parser = OptionParser(usage=usage) | |
parser.add_option("-i", "--reads_fp", action="store", type="string", help="[REQUIRED]") | |
parser.add_option("-b", "--barcodes_fp", action="store", type="string", help="[REQUIRED]") | |
parser.add_option("-o", "--output_fp", action="store", type="string", help="[REQUIRED]") | |
(options, args) = parser.parse_args() | |
# check for required flags: | |
# first dynamically build up required flags | |
required = [] | |
for item in parser.option_list: | |
if "[REQUIRED]" in item.help: | |
required.append(item.dest) | |
# print out missing options | |
missing = [] | |
for ro in required: | |
if getattr(options, ro) == None: | |
missing.append(ro) | |
missing_str = ', '.join(['--'+mi for mi in missing]) | |
if missing != []: | |
parser.error('Required option(s) missing: \'%s\'.' % missing_str) | |
# assign user input options | |
reads = qiime_open(options.reads_fp) | |
barcodes = qiime_open(options.barcodes_fp) | |
output = open(options.output_fp, 'w') | |
# remove barcodes that are no longer present in the reads file | |
write_synced_barcodes_fastq(reads, barcodes, output) | |
# close files | |
reads.close() | |
barcodes.close() | |
output.close() | |
if __name__ == "__main__": | |
main() |
I responded to your post here. It does not look like that you actually ran the code posted here, but an older bit of quickly hacked-together code of the same name. Keep in mind, that this code is quite different from the one you referred to in the QIIME forum. In fact this script does not output the errors you referred to. :-)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm having issues with this file after updating qiime to 1.9.1. Do you know if there was a change in the qiime scripts or something along those lines that could have caused the sudden errors that I am getting now? remove_unused_barcodes.py was working fine until I updated to 1.9.1.
Errors:
(hundreds of these, it doesn't seem to be finding anything anymore)