import sys | |
THRESHOLD = 0.25 # reads must have 25% of their k-mers assigned | |
for line in sys.stdin: | |
fields = line.strip().split() | |
kmers_fields = fields[4:] | |
total_kmers = sum([int(x.split(":")[1]) for x in kmers_fields]) | |
unassigned_kmers = sum([int(x.split(":")[1]) for x in kmers_fields if x[0] == "0"]) | |
if total_kmers == 0: | |
# Drop unassigned results | |
continue | |
elif unassigned_kmers / float(total_kmers) > (1.0 - THRESHOLD): | |
# Drop reads with many unassigned k-mers | |
continue | |
# Elsewise, print the read to stdout | |
print (line.strip()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Hey hey,
your script seems really useful.
I have only a concern about what to use as input file...Do you have some instructions more about this script?
Alle the best
Luigi