Skip to content

Instantly share code, notes, and snippets.

@lambdalisue
Created April 29, 2014 11:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lambdalisue/11396996 to your computer and use it in GitHub Desktop.
Save lambdalisue/11396996 to your computer and use it in GitHub Desktop.
A library to load fasta format text
# coding=utf-8
"""
"""
__author__ = 'Alisue <lambdalisue@hashnote.net>'
def load_fasta(iterator):
"""
Load fasta file and return sequence list
>>> fasta = '''
... >avGFP
... M-SKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTF
... SYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDG
... NILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQ
... SALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK
...
... >ECFP
... MVSKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTL
... TWGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDG
... NILGHKLEYNYISHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQ
... SALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK'''
>>> sequences = load_fasta(fasta)
>>> sorted(sequences.keys())
['ECFP', 'avGFP']
>>> sequences['ECFP']
'MVSKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTLTWGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYISHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK'
>>> sequences['avGFP']
'M-SKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK'
"""
if not isinstance(iterator, basestring):
iterator = "\n".join(iterator)
sequence_blocks = [x.strip() for x in iterator.split(">") if x.strip()]
name = None
sequences = {}
for sequence_block in sequence_blocks:
sequence_block = sequence_block.splitlines()
name = sequence_block[0].strip()
sequence = [x.strip() for x in sequence_block[1:] if x.strip()]
sequence = "".join(sequence)
sequences[name] = sequence
return sequences
if __name__ == '__main__':
import doctest; doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment