fabiolib/FASTAfilter.py

## FASTAfilter.py
#!/usr/bin/env python
import re, sys
import argparse

# This script was written by David Molnar
# and downloaded from
# http://dm516.user.srcf.net/?p=314
# 2016-04-13

parser = argparse.ArgumentParser(description='From a FASTA-file with multiple >entries, filter by sequence ids using a regex.')
parser.add_argument('regex',
                   help="Regex to filter entry ids, e.g. 'chr[1-4]'. Note that the id does not contain the initial > character.")
parser.add_argument('infile',
                   help='A FASTA input file, usually with multiple entries.')
parser.add_argument('outfile',
                   help='The new file with only the matching entries.')

args = parser.parse_args()

reader = open(args.infile, "rb")
outfile = open(args.outfile, "w")
filter = re.compile(args.regex)
title=""
buffer=""

def useBuffer(title, buffer):
	if filter.match(title):
		print "Used: " + title
		outfile.write(">"+title+"\n")
		outfile.write(buffer)
		outfile.write("\n")
	else:
		print "Skipped: "+title


while 1:
	line = reader.readline()
	if not line:
		#This is the end
		if title:
			#use the last title and buffer...
			useBuffer(title, buffer)
		break

	line = line.strip()

	if line == "":
		continue

	if line.startswith(">"):
		#this is a title -- starts a new fasta block
		if buffer:
			#use the last title and buffer somehow...
			useBuffer(title, buffer)

		buffer = ""
		title = line [1:]
		continue

	if title:
		buffer += line

reader.close()
outfile.close()
	#!/usr/bin/env python
	import re, sys
	import argparse

	# This script was written by David Molnar
	# and downloaded from
	# http://dm516.user.srcf.net/?p=314
	# 2016-04-13

	parser = argparse.ArgumentParser(description='From a FASTA-file with multiple >entries, filter by sequence ids using a regex.')
	parser.add_argument('regex',
	help="Regex to filter entry ids, e.g. 'chr[1-4]'. Note that the id does not contain the initial > character.")
	parser.add_argument('infile',
	help='A FASTA input file, usually with multiple entries.')
	parser.add_argument('outfile',
	help='The new file with only the matching entries.')

	args = parser.parse_args()

	reader = open(args.infile, "rb")
	outfile = open(args.outfile, "w")
	filter = re.compile(args.regex)
	title=""
	buffer=""

	def useBuffer(title, buffer):
	if filter.match(title):
	print "Used: " + title
	outfile.write(">"+title+"\n")
	outfile.write(buffer)
	outfile.write("\n")
	else:
	print "Skipped: "+title


	while 1:
	line = reader.readline()
	if not line:
	#This is the end
	if title:
	#use the last title and buffer...
	useBuffer(title, buffer)
	break

	line = line.strip()

	if line == "":
	continue

	if line.startswith(">"):
	#this is a title -- starts a new fasta block
	if buffer:
	#use the last title and buffer somehow...
	useBuffer(title, buffer)

	buffer = ""
	title = line [1:]
	continue

	if title:
	buffer += line

	reader.close()
	outfile.close()