walterst/filter_barcode_header.py

## filter_barcode_header.py
#!/usr/bin/env python


# Usage:  python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
# WARNING-the second file specified will be overwritten if it exists!

bc_start_indicator = "1:N:0:"
chars_to_strip = ["+"]

from sys import argv

from cogent.parse.fastq import MinimalFastqParser
from qiime.util import gzip_open

header_index = 0
sequence_index = 1
quality_index = 2

if argv[1].endswith('.gz'):
    query_reads = gzip_open(argv[1])
else:
    query_reads = open(argv[1], "U")

output_fastq = open(argv[2], "w")

for read_data in MinimalFastqParser(query_reads, strict=False):
    curr_header = read_data[header_index].split(bc_start_indicator)
    curr_bc = curr_header[1]
    for char_to_strip in chars_to_strip:
        curr_bc = curr_bc.replace(char_to_strip, "")
    final_header = curr_header[0] + bc_start_indicator + curr_bc
    curr_read = "@%s\n" % final_header
    curr_read += "%s\n" % read_data[sequence_index]
    curr_read += "+\n"
    curr_read += "%s\n" % read_data[quality_index]
    output_fastq.write(curr_read)
	#!/usr/bin/env python


	# Usage: python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
	# WARNING-the second file specified will be overwritten if it exists!

	bc_start_indicator = "1:N:0:"
	chars_to_strip = ["+"]

	from sys import argv

	from cogent.parse.fastq import MinimalFastqParser
	from qiime.util import gzip_open

	header_index = 0
	sequence_index = 1
	quality_index = 2

	if argv[1].endswith('.gz'):
	query_reads = gzip_open(argv[1])
	else:
	query_reads = open(argv[1], "U")

	output_fastq = open(argv[2], "w")

	for read_data in MinimalFastqParser(query_reads, strict=False):
	curr_header = read_data[header_index].split(bc_start_indicator)
	curr_bc = curr_header[1]
	for char_to_strip in chars_to_strip:
	curr_bc = curr_bc.replace(char_to_strip, "")
	final_header = curr_header[0] + bc_start_indicator + curr_bc
	curr_read = "@%s\n" % final_header
	curr_read += "%s\n" % read_data[sequence_index]
	curr_read += "+\n"
	curr_read += "%s\n" % read_data[quality_index]
	output_fastq.write(curr_read)