Skip to content

Instantly share code, notes, and snippets.

@kgori
Last active October 27, 2016 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kgori/41a68016b27afc021975 to your computer and use it in GitHub Desktop.
Save kgori/41a68016b27afc021975 to your computer and use it in GitHub Desktop.
Fasta parser - python oneliners
import re
# Fasta one-liners
# Parse fasta from a string
def fasta_parse_string(s):
return dict([(lambda x:(x[0].strip(), re.sub(r'\s+','',x[2])))(l.partition('\n')) for l in s.split('>')[1:]])
# Parse fasta from a file
def fasta_parse_file(f):
with open(f) as h:return fasta_parse_string(h.read())
# Write data (d) back into fasta format, breaking into lines (length=n)
def fasta_format(d, n):
return '\n'.join(['>{}\n'.format(k)+'\n'.join([v[i:i+n] for i in range(0,len(v),n)]) for k,v in d.items()])
# Memory efficient fasta
class Fasta(object):
def __init__(self, filename):
self.file = open(filename, 'r')
self.buffer = ''
while not self.buffer.startswith('>'):
self.consume()
self.exhausted = False
def consume(self):
self.buffer = self.file.readline()
if self.buffer == '':
self.file.close()
raise StopIteration
def next(self):
if self.exhausted:
raise StopIteration
desc = self.buffer.strip('>').strip()
self.consume()
seq = []
while not self.buffer.startswith('>'):
seq.append(self.buffer)
try:
self.consume()
except StopIteration:
self.exhausted = True
break
return desc, re.sub(r'\s+', '', ''.join(seq))
def __iter__(self):
return self
def parse_fasta(filename):
return {id: seq for id, seq in Fasta(filename)}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment