Skip to content

Instantly share code, notes, and snippets.

@JohnLonginotto
Last active September 10, 2016 12:58
Show Gist options
  • Save JohnLonginotto/7909b2c01da0273bb1f78e706070b481 to your computer and use it in GitHub Desktop.
Save JohnLonginotto/7909b2c01da0273bb1f78e706070b481 to your computer and use it in GitHub Desktop.
import sys
reads = sys.argv[1]
index = sys.argv[2]
all_readIDs = set()
# Re-written the code below to only ever read the first row:
with open(reads, "r") as read_file:
while True:
try:
line1 = next(read_file)
if line1.startswith("@HWI"): all_readIDs.add(line1)
next(read_file) # We walk through the
next(read_file) # next 3 lines but don't
next(read_file) # save the data anywhere.
except StopIteration:
break
# This hasn't changed:
if not all_readIDs: print "List is Empty"
else:
for idx,item in enumerate(all_readIDs):
print item
if idx == 10: break
# Rewritten to not skip any rows and write all 4 rows to a file if the ID line in the ID set:
with open(index, "r") as index_file, open("filtered_index.fastq", "w") as filtered_file:
while True:
try:
line1 = next(index_file)
line2 = next(index_file)
line3 = next(index_file)
line4 = next(index_file)
except StopIteration:
break
if line1 in all_readIDs:
filtered_file.write(line1)
filtered_file.write(line2)
filtered_file.write(line3)
filtered_file.write(line4)
print "All done!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment