Created
April 27, 2014 13:25
-
-
Save lambdalisue/11345518 to your computer and use it in GitHub Desktop.
A library for handling Amino Acid sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
""" | |
Amino acid sequence manupulation library | |
""" | |
__author__ = 'Alisue <lambdalisue@hashnote.net>' | |
AMINOACIDS = list("ARNDCEQGHILKMFPSTWYV") | |
AMINOACIDS3 = ( | |
'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', | |
'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', | |
) | |
def ichunk(s, chunk_size): | |
""" | |
Create an iterator which yield chunk string with specified size | |
Args: | |
s (str): A base string | |
chunk_size (int): A chunk string length | |
Yields: | |
Chunked string | |
""" | |
for index in xrange(0, len(s), chunk_size): | |
yield s[index:index+chunk_size] | |
def chunk(s, chunk_size): | |
""" | |
Chunk string with specified size and return the list | |
Args: | |
s (str): A base string | |
chunk_size (int): A chunk string length | |
Return: | |
str : A chunked string list | |
Example: | |
>>> sequence = 'ABCDEFGHIJKLMN' | |
>>> chunk(sequence, 2) | |
['AB', 'CD', 'EF', 'GH', 'IJ', 'KL', 'MN'] | |
>>> chunk(sequence, 3) | |
['ABC', 'DEF', 'GHI', 'JKL', 'MN'] | |
""" | |
return list(ichunk(s, chunk_size)) | |
def itranslate1(sequence): | |
""" | |
Iterate over triple letters in the amino acid sequence and yield the | |
corresponding amino acid single letter | |
Args: | |
sequence (str): A triple letters amino acid sequence | |
Yield: | |
str : A corresponding single letter of amino acid | |
""" | |
# get translation table from the cache (or create) | |
cache = '_translation_table' | |
if not hasattr(itranslate1, cache): | |
setattr(itranslate1, cache, dict(zip(AMINOACIDS3, AMINOACIDS))) | |
table = getattr(itranslate1, cache) | |
# iterate translation | |
for aa in ichunk(sequence, 3): | |
yield table[aa.upper()] | |
def translate1(sequence): | |
""" | |
Translate triple letters in the amino acid sequence to the corresponding | |
amino acid single letter sequence | |
Args: | |
sequence (str): A triple letters amino acid sequence | |
Return: | |
str : A corresponding single letter of amino acid sequence | |
Example: | |
>>> sequence = ("AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMet" | |
... "PheProSerThrTrpTyrVal") | |
>>> translate1(sequence) | |
'ARNDCEQGHILKMFPSTWYV' | |
""" | |
return "".join(itranslate1(sequence)) | |
def itranslate3(sequence): | |
""" | |
Iterate over a single letter in the amino acid sequence and yield the | |
corresponding amino acid triple letters | |
Args: | |
sequence (str): A single letter amino acid sequence | |
Yield: | |
str : A corresponding triple letters of amino acid | |
""" | |
# get translation table from the cache (or create) | |
cache = '_translation_table' | |
if not hasattr(itranslate3, cache): | |
setattr(itranslate3, cache, dict(zip(AMINOACIDS, AMINOACIDS3))) | |
table = getattr(itranslate3, cache) | |
# iterate translation | |
for aa in sequence: | |
yield table[aa].capitalize() | |
def translate3(sequence): | |
""" | |
Translate a single letter in the amino acid sequence to the corresponding | |
amino acid triple letter sequence | |
Args: | |
sequence (str): A single letter amino acid sequence | |
Return: | |
str : A corresponding triple letters of amino acid sequence | |
Example: | |
>>> sequence = 'ARNDCEQGHILKMFPSTWYV' | |
>>> translate3(sequence) | |
'AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMetPheProSerThrTrpTyrVal' | |
""" | |
return "".join(itranslate3(sequence)) | |
if __name__ == '__main__': | |
# run doctest | |
import doctest; doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment