Skip to content

Instantly share code, notes, and snippets.

@fabiolib
Created April 13, 2016 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabiolib/d5fa3fb81f8dd114dc01b585606e7a9b to your computer and use it in GitHub Desktop.
Save fabiolib/d5fa3fb81f8dd114dc01b585606e7a9b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import re, sys
import argparse
# This script was written by David Molnar
# and downloaded from
# http://dm516.user.srcf.net/?p=314
# 2016-04-13
parser = argparse.ArgumentParser(description='From a FASTA-file with multiple >entries, filter by sequence ids using a regex.')
parser.add_argument('regex',
help="Regex to filter entry ids, e.g. 'chr[1-4]'. Note that the id does not contain the initial > character.")
parser.add_argument('infile',
help='A FASTA input file, usually with multiple entries.')
parser.add_argument('outfile',
help='The new file with only the matching entries.')
args = parser.parse_args()
reader = open(args.infile, "rb")
outfile = open(args.outfile, "w")
filter = re.compile(args.regex)
title=""
buffer=""
def useBuffer(title, buffer):
if filter.match(title):
print "Used: " + title
outfile.write(">"+title+"\n")
outfile.write(buffer)
outfile.write("\n")
else:
print "Skipped: "+title
while 1:
line = reader.readline()
if not line:
#This is the end
if title:
#use the last title and buffer...
useBuffer(title, buffer)
break
line = line.strip()
if line == "":
continue
if line.startswith(">"):
#this is a title -- starts a new fasta block
if buffer:
#use the last title and buffer somehow...
useBuffer(title, buffer)
buffer = ""
title = line [1:]
continue
if title:
buffer += line
reader.close()
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment