Skip to content

Instantly share code, notes, and snippets.

@lambdalisue
Created April 27, 2014 13:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lambdalisue/11345518 to your computer and use it in GitHub Desktop.
Save lambdalisue/11345518 to your computer and use it in GitHub Desktop.
A library for handling Amino Acid sequence
# coding=utf-8
"""
Amino acid sequence manupulation library
"""
__author__ = 'Alisue <lambdalisue@hashnote.net>'
AMINOACIDS = list("ARNDCEQGHILKMFPSTWYV")
AMINOACIDS3 = (
'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU',
'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
)
def ichunk(s, chunk_size):
"""
Create an iterator which yield chunk string with specified size
Args:
s (str): A base string
chunk_size (int): A chunk string length
Yields:
Chunked string
"""
for index in xrange(0, len(s), chunk_size):
yield s[index:index+chunk_size]
def chunk(s, chunk_size):
"""
Chunk string with specified size and return the list
Args:
s (str): A base string
chunk_size (int): A chunk string length
Return:
str : A chunked string list
Example:
>>> sequence = 'ABCDEFGHIJKLMN'
>>> chunk(sequence, 2)
['AB', 'CD', 'EF', 'GH', 'IJ', 'KL', 'MN']
>>> chunk(sequence, 3)
['ABC', 'DEF', 'GHI', 'JKL', 'MN']
"""
return list(ichunk(s, chunk_size))
def itranslate1(sequence):
"""
Iterate over triple letters in the amino acid sequence and yield the
corresponding amino acid single letter
Args:
sequence (str): A triple letters amino acid sequence
Yield:
str : A corresponding single letter of amino acid
"""
# get translation table from the cache (or create)
cache = '_translation_table'
if not hasattr(itranslate1, cache):
setattr(itranslate1, cache, dict(zip(AMINOACIDS3, AMINOACIDS)))
table = getattr(itranslate1, cache)
# iterate translation
for aa in ichunk(sequence, 3):
yield table[aa.upper()]
def translate1(sequence):
"""
Translate triple letters in the amino acid sequence to the corresponding
amino acid single letter sequence
Args:
sequence (str): A triple letters amino acid sequence
Return:
str : A corresponding single letter of amino acid sequence
Example:
>>> sequence = ("AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMet"
... "PheProSerThrTrpTyrVal")
>>> translate1(sequence)
'ARNDCEQGHILKMFPSTWYV'
"""
return "".join(itranslate1(sequence))
def itranslate3(sequence):
"""
Iterate over a single letter in the amino acid sequence and yield the
corresponding amino acid triple letters
Args:
sequence (str): A single letter amino acid sequence
Yield:
str : A corresponding triple letters of amino acid
"""
# get translation table from the cache (or create)
cache = '_translation_table'
if not hasattr(itranslate3, cache):
setattr(itranslate3, cache, dict(zip(AMINOACIDS, AMINOACIDS3)))
table = getattr(itranslate3, cache)
# iterate translation
for aa in sequence:
yield table[aa].capitalize()
def translate3(sequence):
"""
Translate a single letter in the amino acid sequence to the corresponding
amino acid triple letter sequence
Args:
sequence (str): A single letter amino acid sequence
Return:
str : A corresponding triple letters of amino acid sequence
Example:
>>> sequence = 'ARNDCEQGHILKMFPSTWYV'
>>> translate3(sequence)
'AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMetPheProSerThrTrpTyrVal'
"""
return "".join(itranslate3(sequence))
if __name__ == '__main__':
# run doctest
import doctest; doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment