Skip to content

Instantly share code, notes, and snippets.

@ivan-krukov
Last active December 20, 2015 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivan-krukov/6143278 to your computer and use it in GitHub Desktop.
Save ivan-krukov/6143278 to your computer and use it in GitHub Desktop.
Fasta grep. Search a fasta file with a pattern. Either a single query or a file with regexes
#get sequences by regex match from a fasta file
from argparse import ArgumentParser
import re
parser = ArgumentParser(description="grep for fasta IDs")
group = parser.add_mutually_exclusive_group()
group.add_argument("--query_string","-q")
group.add_argument("--query_file","-f")
parser.add_argument("fasta_file")
parser.add_argument("--single_sequence","-s",action="store_true")
args = parser.parse_args()
fasta = open(args.fasta_file).read()
if args.query_string:
query = re.compile(">"+args.query_string+".*?>",re.DOTALL)
m = query.findall(fasta)
if m:
if args.single_sequence:
print m[0][:-2]
else:
for match in m:
print match[:-2]
if args.query_file:
for line in open(args.query_file):
query = re.compile(">"+line.strip()+".*?>",re.DOTALL)
m = query.findall(fasta)
if m:
if args.single_sequence:
print m[0][:-2]
else:
for match in m:
print match[:-2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment