Created
April 13, 2016 15:08
-
-
Save fabiolib/d5fa3fb81f8dd114dc01b585606e7a9b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re, sys | |
import argparse | |
# This script was written by David Molnar | |
# and downloaded from | |
# http://dm516.user.srcf.net/?p=314 | |
# 2016-04-13 | |
parser = argparse.ArgumentParser(description='From a FASTA-file with multiple >entries, filter by sequence ids using a regex.') | |
parser.add_argument('regex', | |
help="Regex to filter entry ids, e.g. 'chr[1-4]'. Note that the id does not contain the initial > character.") | |
parser.add_argument('infile', | |
help='A FASTA input file, usually with multiple entries.') | |
parser.add_argument('outfile', | |
help='The new file with only the matching entries.') | |
args = parser.parse_args() | |
reader = open(args.infile, "rb") | |
outfile = open(args.outfile, "w") | |
filter = re.compile(args.regex) | |
title="" | |
buffer="" | |
def useBuffer(title, buffer): | |
if filter.match(title): | |
print "Used: " + title | |
outfile.write(">"+title+"\n") | |
outfile.write(buffer) | |
outfile.write("\n") | |
else: | |
print "Skipped: "+title | |
while 1: | |
line = reader.readline() | |
if not line: | |
#This is the end | |
if title: | |
#use the last title and buffer... | |
useBuffer(title, buffer) | |
break | |
line = line.strip() | |
if line == "": | |
continue | |
if line.startswith(">"): | |
#this is a title -- starts a new fasta block | |
if buffer: | |
#use the last title and buffer somehow... | |
useBuffer(title, buffer) | |
buffer = "" | |
title = line [1:] | |
continue | |
if title: | |
buffer += line | |
reader.close() | |
outfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment