walterst/filter_fastq.py

## filter_fastq.py
#!/usr/bin/env python

# Used to filter a fastq to match another fastq that is a subset of the query one, e.g. matching a
# index fastq to the pear assembled subset fastq
# Usage:  python filter_fastq.py input_fastq target_fastq output_fastq

from sys import argv

from cogent.parse.fastq import MinimalFastqParser

header_index = 0
sequence_index = 1
quality_index = 2

query_reads = open(argv[1], "U")
target_reads = open(argv[2], "U")
output_fastq = open(argv[3], "w")

target_labels = []

for read_data in MinimalFastqParser(target_reads, strict=False):
    target_labels.append(read_data[header_index].split()[0])

target_labels = set(target_labels)

for read_data in MinimalFastqParser(query_reads, strict=False):
    if read_data[header_index].split()[0] in target_labels:
        output_fastq.write("@%s\n" % read_data[header_index])
        output_fastq.write("%s\n" % read_data[sequence_index])
        output_fastq.write("+\n")
        output_fastq.write("%s\n" % read_data[quality_index])
	#!/usr/bin/env python

	# Used to filter a fastq to match another fastq that is a subset of the query one, e.g. matching a
	# index fastq to the pear assembled subset fastq
	# Usage: python filter_fastq.py input_fastq target_fastq output_fastq

	from sys import argv

	from cogent.parse.fastq import MinimalFastqParser

	header_index = 0
	sequence_index = 1
	quality_index = 2

	query_reads = open(argv[1], "U")
	target_reads = open(argv[2], "U")
	output_fastq = open(argv[3], "w")

	target_labels = []

	for read_data in MinimalFastqParser(target_reads, strict=False):
	target_labels.append(read_data[header_index].split()[0])

	target_labels = set(target_labels)

	for read_data in MinimalFastqParser(query_reads, strict=False):
	if read_data[header_index].split()[0] in target_labels:
	output_fastq.write("@%s\n" % read_data[header_index])
	output_fastq.write("%s\n" % read_data[sequence_index])
	output_fastq.write("+\n")
	output_fastq.write("%s\n" % read_data[quality_index])