Skip to content

Instantly share code, notes, and snippets.

@bluegenes
Last active December 20, 2018 19:57
Show Gist options
  • Save bluegenes/c70daf6fbfea8e2f569f0c6f8261048b to your computer and use it in GitHub Desktop.
Save bluegenes/c70daf6fbfea8e2f569f0c6f8261048b to your computer and use it in GitHub Desktop.
#Author: Tessa Pierce
import argparse
import screed
import re
def extract_contigs(in_fasta, pattern, outF):
if not outF:
outF = in_fasta.split('.fa')[0] + '_' + pattern + '.fa'
with screed.open(in_fasta) as seqF:
with open(outF, 'w') as o:
for read in seqF:
match = re.search(pattern, read.name, re.IGNORECASE)
if match:
o.write('\n'.join(['>'+read.name,read.sequence]) + '\n')
if(__name__=='__main__'):
parser = argparse.ArgumentParser(description="Create simple BED from fasta")
parser.add_argument('fasta', help='input fasta file')
parser.add_argument('--pattern', help='pattern to match')
parser.add_argument('--out', help='output fasta file', default=None)
args = parser.parse_args()
extract_contigs(args.fasta, args.pattern, args.out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment