Skip to content

Instantly share code, notes, and snippets.

@lambdalisue
Created April 29, 2014 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lambdalisue/11401548 to your computer and use it in GitHub Desktop.
Save lambdalisue/11401548 to your computer and use it in GitHub Desktop.
A library to find a corresponding residues from a fasta alignment
# coding=utf-8
"""
"""
__author__ = 'Alisue <lambdalisue@hashnote.net>'
import re
def find_alignment_offsets(sequence):
"""
Get alignment offset from alignment sequence
Args:
sequence (str): A string sequence which might contain '-' to indicate
the gap
Returns:
An alignment offsets list
Examples:
>>> sequence = (
... "MVSKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTL"
... "TWGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDG"
... "NILGHKLEYNYISHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQ"
... "SALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK")
>>> find_alignment_offsets(sequence)
[(7, 4), (55, 1)]
"""
# create offset list from the sequence
offsets = []
for m in re.finditer("-+", sequence):
offsets.append((m.start(), len(m.group())))
return offsets
def find_corresponding_residue_id(residue_id, sequence, reverse=False):
"""
Get corresponding residue id
Examples:
>>> sequence = (
... "MVSKGEE----LFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT-GKLPVPWPTLVTTL"
... "TWGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDG"
... "NILGHKLEYNYISHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQ"
... "SALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK")
>>> find_corresponding_residue_id(0, sequence)
0
>>> find_corresponding_residue_id(1, sequence)
1
>>> find_corresponding_residue_id(7, sequence)
11
>>> find_corresponding_residue_id(8, sequence)
12
>>> find_corresponding_residue_id(12, sequence, reverse=True)
8
>>> find_corresponding_residue_id(11, sequence, reverse=True)
7
>>> find_corresponding_residue_id(10, sequence, reverse=True)
10
"""
offsets = find_alignment_offsets(sequence)
addition = 0
for index, offset in offsets:
if not reverse:
if residue_id >= index:
addition += offset
else:
if residue_id - offset >= index:
addition -= offset
return residue_id + addition
if __name__ == '__main__':
import doctest; doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment