Skip to content

Instantly share code, notes, and snippets.

@dinovski
Last active September 8, 2018 21:38
Show Gist options
  • Save dinovski/baa06d99f292d6c749fd8cb3cde8e3b8 to your computer and use it in GitHub Desktop.
Save dinovski/baa06d99f292d6c749fd8cb3cde8e3b8 to your computer and use it in GitHub Desktop.
quality score distribution from fastq
#!/usr/bin/env python
usage="""
## quality score distribution from fastq
gunzip -c fastq.gz | python qualDist.py
"""
import sys
num = 0
cnt = {}
all_q = []
for line in sys.stdin:
num += 1
if num%4 == 0:
for q in line.strip():
# translate to ascii numeric value
q1 = ord(q)
#print "q1: %s" % q1
all_q.append(q1)
# tally num occurences of each obs value
if q1 not in cnt: cnt[q1] = 0
cnt[q1] += 1
#print "cnt: %s" % cnt
print(len(all_q))
avg_q = sum(all_q)/float(len(all_q))
print "avg qual score: %s" % avg_q
## output distribution of values
#for k in cnt:
# print "score: %s count: %d" % (k, cnt[k])
# print k, cnt[k]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment