lambdalisue/aminoacids.py

## aminoacids.py
# coding=utf-8
"""
Amino acid sequence manupulation library
"""
__author__ = 'Alisue <lambdalisue@hashnote.net>'
AMINOACIDS = list("ARNDCEQGHILKMFPSTWYV")
AMINOACIDS3 = (
    'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU',
    'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
)

def ichunk(s, chunk_size):
    """
    Create an iterator which yield chunk string with specified size

    Args:
        s (str): A base string
        chunk_size (int): A chunk string length

    Yields:
        Chunked string
    """
    for index in xrange(0, len(s), chunk_size):
        yield s[index:index+chunk_size]

def chunk(s, chunk_size):
    """
    Chunk string with specified size and return the list

    Args:
        s (str): A base string
        chunk_size (int): A chunk string length

    Return:
        str : A chunked string list

    Example:
        >>> sequence = 'ABCDEFGHIJKLMN'
        >>> chunk(sequence, 2)
        ['AB', 'CD', 'EF', 'GH', 'IJ', 'KL', 'MN']
        >>> chunk(sequence, 3)
        ['ABC', 'DEF', 'GHI', 'JKL', 'MN']
    """
    return list(ichunk(s, chunk_size))

def itranslate1(sequence):
    """
    Iterate over triple letters in the amino acid sequence and yield the
    corresponding amino acid single letter

    Args:
        sequence (str): A triple letters amino acid sequence

    Yield:
        str : A corresponding single letter of amino acid
    """
    # get translation table from the cache (or create)
    cache = '_translation_table'
    if not hasattr(itranslate1, cache):
        setattr(itranslate1, cache, dict(zip(AMINOACIDS3, AMINOACIDS)))
    table = getattr(itranslate1, cache)
    # iterate translation
    for aa in ichunk(sequence, 3):
        yield table[aa.upper()]

def translate1(sequence):
    """
    Translate triple letters in the amino acid sequence to the corresponding
    amino acid single letter sequence

    Args:
        sequence (str): A triple letters amino acid sequence

    Return:
        str : A corresponding single letter of amino acid sequence

    Example:
        >>> sequence = ("AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMet"
        ...             "PheProSerThrTrpTyrVal")
        >>> translate1(sequence)
        'ARNDCEQGHILKMFPSTWYV'
    """
    return "".join(itranslate1(sequence))

def itranslate3(sequence):
    """
    Iterate over a single letter in the amino acid sequence and yield the
    corresponding amino acid triple letters

    Args:
        sequence (str): A single letter amino acid sequence

    Yield:
        str : A corresponding triple letters of amino acid
    """
    # get translation table from the cache (or create)
    cache = '_translation_table'
    if not hasattr(itranslate3, cache):
        setattr(itranslate3, cache, dict(zip(AMINOACIDS, AMINOACIDS3)))
    table = getattr(itranslate3, cache)
    # iterate translation
    for aa in sequence:
        yield table[aa].capitalize()

def translate3(sequence):
    """
    Translate a single letter in the amino acid sequence to the corresponding
    amino acid triple letter sequence

    Args:
        sequence (str): A single letter amino acid sequence

    Return:
        str : A corresponding triple letters of amino acid sequence

    Example:
        >>> sequence = 'ARNDCEQGHILKMFPSTWYV'
        >>> translate3(sequence)
        'AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMetPheProSerThrTrpTyrVal'
    """
    return "".join(itranslate3(sequence))


if __name__ == '__main__':
    # run doctest
    import doctest; doctest.testmod()
	# coding=utf-8
	"""
	Amino acid sequence manupulation library
	"""
	__author__ = 'Alisue <lambdalisue@hashnote.net>'
	AMINOACIDS = list("ARNDCEQGHILKMFPSTWYV")
	AMINOACIDS3 = (
	'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU',
	'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
	)

	def ichunk(s, chunk_size):
	"""
	Create an iterator which yield chunk string with specified size

	Args:
	s (str): A base string
	chunk_size (int): A chunk string length

	Yields:
	Chunked string
	"""
	for index in xrange(0, len(s), chunk_size):
	yield s[index:index+chunk_size]

	def chunk(s, chunk_size):
	"""
	Chunk string with specified size and return the list

	Args:
	s (str): A base string
	chunk_size (int): A chunk string length

	Return:
	str : A chunked string list

	Example:
	>>> sequence = 'ABCDEFGHIJKLMN'
	>>> chunk(sequence, 2)
	['AB', 'CD', 'EF', 'GH', 'IJ', 'KL', 'MN']
	>>> chunk(sequence, 3)
	['ABC', 'DEF', 'GHI', 'JKL', 'MN']
	"""
	return list(ichunk(s, chunk_size))

	def itranslate1(sequence):
	"""
	Iterate over triple letters in the amino acid sequence and yield the
	corresponding amino acid single letter

	Args:
	sequence (str): A triple letters amino acid sequence

	Yield:
	str : A corresponding single letter of amino acid
	"""
	# get translation table from the cache (or create)
	cache = '_translation_table'
	if not hasattr(itranslate1, cache):
	setattr(itranslate1, cache, dict(zip(AMINOACIDS3, AMINOACIDS)))
	table = getattr(itranslate1, cache)
	# iterate translation
	for aa in ichunk(sequence, 3):
	yield table[aa.upper()]

	def translate1(sequence):
	"""
	Translate triple letters in the amino acid sequence to the corresponding
	amino acid single letter sequence

	Args:
	sequence (str): A triple letters amino acid sequence

	Return:
	str : A corresponding single letter of amino acid sequence

	Example:
	>>> sequence = ("AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMet"
	... "PheProSerThrTrpTyrVal")
	>>> translate1(sequence)
	'ARNDCEQGHILKMFPSTWYV'
	"""
	return "".join(itranslate1(sequence))

	def itranslate3(sequence):
	"""
	Iterate over a single letter in the amino acid sequence and yield the
	corresponding amino acid triple letters

	Args:
	sequence (str): A single letter amino acid sequence

	Yield:
	str : A corresponding triple letters of amino acid
	"""
	# get translation table from the cache (or create)
	cache = '_translation_table'
	if not hasattr(itranslate3, cache):
	setattr(itranslate3, cache, dict(zip(AMINOACIDS, AMINOACIDS3)))
	table = getattr(itranslate3, cache)
	# iterate translation
	for aa in sequence:
	yield table[aa].capitalize()

	def translate3(sequence):
	"""
	Translate a single letter in the amino acid sequence to the corresponding
	amino acid triple letter sequence

	Args:
	sequence (str): A single letter amino acid sequence

	Return:
	str : A corresponding triple letters of amino acid sequence

	Example:
	>>> sequence = 'ARNDCEQGHILKMFPSTWYV'
	>>> translate3(sequence)
	'AlaArgAsnAspCysGluGlnGlyHisIleLeuLysMetPheProSerThrTrpTyrVal'
	"""
	return "".join(itranslate3(sequence))


	if __name__ == '__main__':
	# run doctest
	import doctest; doctest.testmod()