Skip to content

Instantly share code, notes, and snippets.

@jakebiesinger
Created December 29, 2010 20:20
Show Gist options
  • Save jakebiesinger/759018 to your computer and use it in GitHub Desktop.
Save jakebiesinger/759018 to your computer and use it in GitHub Desktop.
Simple parser for fastq file format
from itertools import ifilter, islice
def readFastq(fastqfile):
"parse a fastq-formatted file, yielding a (header, sequence, quality) tuple"
fastqiter = (l.strip('\n') for l in fastqfile) # strip trailing newlines
fastqiter = ifilter(lambda l: l, fastqiter) # skip blank lines
while True:
fqlines = list(islice(fastqiter, 4))
if len(fqlines) == 4:
header1,seq,header2,qual = fqlines
if header1.startswith('@') and header2.startswith('+'):
yield header1[1:], seq, qual
else:
raise ValueError("Invalid header lines: %s and %s" % (header1, header2))
elif len(fqlines) == 0:
raise StopIteration
else:
raise EOFError("Failed to parse four lines from fastq file!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment