Skip to content

Instantly share code, notes, and snippets.

@walterst
Last active November 2, 2017 11:16
Show Gist options
  • Save walterst/927070e5313a730b34af238413d7462c to your computer and use it in GitHub Desktop.
Save walterst/927070e5313a730b34af238413d7462c to your computer and use it in GitHub Desktop.
Filters an input fastq to match labels in target fastq file.
#!/usr/bin/env python
# Used to filter a fastq to match another fastq that is a subset of the query one, e.g. matching a
# index fastq to the pear assembled subset fastq
# Usage: python filter_fastq.py input_fastq target_fastq output_fastq
from sys import argv
from cogent.parse.fastq import MinimalFastqParser
header_index = 0
sequence_index = 1
quality_index = 2
query_reads = open(argv[1], "U")
target_reads = open(argv[2], "U")
output_fastq = open(argv[3], "w")
target_labels = []
for read_data in MinimalFastqParser(target_reads, strict=False):
target_labels.append(read_data[header_index].split()[0])
target_labels = set(target_labels)
for read_data in MinimalFastqParser(query_reads, strict=False):
if read_data[header_index].split()[0] in target_labels:
output_fastq.write("@%s\n" % read_data[header_index])
output_fastq.write("%s\n" % read_data[sequence_index])
output_fastq.write("+\n")
output_fastq.write("%s\n" % read_data[quality_index])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment