Filters an input fastq to match labels in target fastq file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Used to filter a fastq to match another fastq that is a subset of the query one, e.g. matching a | |
# index fastq to the pear assembled subset fastq | |
# Usage: python filter_fastq.py input_fastq target_fastq output_fastq | |
from sys import argv | |
from cogent.parse.fastq import MinimalFastqParser | |
header_index = 0 | |
sequence_index = 1 | |
quality_index = 2 | |
query_reads = open(argv[1], "U") | |
target_reads = open(argv[2], "U") | |
output_fastq = open(argv[3], "w") | |
target_labels = [] | |
for read_data in MinimalFastqParser(target_reads, strict=False): | |
target_labels.append(read_data[header_index].split()[0]) | |
target_labels = set(target_labels) | |
for read_data in MinimalFastqParser(query_reads, strict=False): | |
if read_data[header_index].split()[0] in target_labels: | |
output_fastq.write("@%s\n" % read_data[header_index]) | |
output_fastq.write("%s\n" % read_data[sequence_index]) | |
output_fastq.write("+\n") | |
output_fastq.write("%s\n" % read_data[quality_index]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment