Skip to content

Instantly share code, notes, and snippets.

@friveroll
Last active October 9, 2017 00:24
Show Gist options
  • Save friveroll/7a62ec667945a7aec739be62ece6541a to your computer and use it in GitHub Desktop.
Save friveroll/7a62ec667945a7aec739be62ece6541a to your computer and use it in GitHub Desktop.
Problema 1 del folleto "A The first look at the Code of Life" de Cleopatra Kozlowski http://emblog.embl.de/ells/teachingbase/code-of-life-2/. Se trata de encontrar secuencias de péptidos en una secuencia de ADN dada, para ello este pequeño programa genera una expresión regular basada en la secuencia de aminoácidos para encontrar la posición en l…
# coding: utf-8
import re
def secuencia_degenerada(sequencia):
seq = []
aa_degenerados = {
'A': ['GC.'],
'C': ['TG[CT]'],
'D': ['GA[CT]'],
'E': ['GA[AG]'],
'F': ['TT[CT]'],
'G': ['GG.'],
'H': ['CA[CT]'],
'I': ['AT[CAT]'],
'K': ['AA[AG]'],
'L': ['[CT]T.'],
'M': ['ATG'],
'N': ['AA[CT]'],
'P': ['CC.'],
'Q': ['CA[AG]'],
'R': ['[AC]G.'],
'S': ['[AT][CG].'],
'T': ['AC.'],
'V': ['GT.'],
'W': ['TGG'],
'Y': ['TA[CT]'],
'*': ['T[AG][AG]']
}
for i in range(len(sequencia)):
seq.append(aa_degenerados[sequencia[i]])
regexp = ''.join(str(e) for e in seq)
return regexp.replace("['", "").replace("']", "")
def encuentra_motivo(regexp, DNA_sequence):
return re.search(regexp, DNA_sequence).group()
def main():
DNA_sequence = "TGGTCCTGCAGTCCTCTCCTGGCGCCCCGGGGGCGAGCGGATGTCGATTCTCGTGGAAAGATAGTCCCGCTGCCTGCGGGCGGAGGGACCGTGCTGACCAAGATGTACCCGCGCGGCAACCACTGGGCGGTGGGGCACTTAATGGGGAAAAAGAGCACAGGGGAGTCTTCTTCTGTATGTTCTGAGAGAGGGAGCCTGAAGCAGCAGCTGAGAGATGTGTCGAACGTGAAGTACATCAGGTGGGAAGAAGCTGCAAGGAATTTGCTGGGTCTCATAGAAGCTAAAGGAGAACAGAAACCACCAGCCACCTCAACCCAAGATGTCGATTCTACTTTATTAAGCCCTGGGCAATCAGCAGCCTTCGTGGGATATGTCAGAGGATAGCAGCCAACTGAATAGC"
#Met Ser Ile Leu Val Glu Arg Stop
peptido_1 = "MSILVER*"
#Met Cys Arg Thr Stop
peptido_2 = "MCRT*"
#Met Ser Ile Leu Leu Tyr Stop
peptido_3 = "MSILLY*"
peptidos = [peptido_1, peptido_2, peptido_3]
for peptido in peptidos:
regexp = secuencia_degenerada(peptido)
match = encuentra_motivo(regexp, DNA_sequence)
print(peptido + "\n" + str(DNA_sequence.find(match) + 1 ) + " " + match + " " + str((DNA_sequence.find(match) + len(match))) + "\n")
if __name__ == '__main__':
main()
@friveroll
Copy link
Author

friveroll commented Oct 8, 2017

Output

MSILVER*
41 ATGTCGATTCTCGTGGAAAGATAG 64

MCRT*
215 ATGTGTCGAACGTGA 229

MSILLY*
320 ATGTCGATTCTACTTTATTAA 340

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment