Skip to content

Instantly share code, notes, and snippets.

@daniel-dona
Last active July 26, 2018 12:40
Show Gist options
  • Save daniel-dona/40991c5530ac04e9347d004239572537 to your computer and use it in GitHub Desktop.
Save daniel-dona/40991c5530ac04e9347d004239572537 to your computer and use it in GitHub Desktop.
Lectura de un fichero en formato FASTA, conversión de bases a codones y de codones a aminoácidos
#!/usr/bin/env python3
'''
Descargado de https://www.ncbi.nlm.nih.gov/nuccore/NM_001143854.1?report=fasta
'''
file = open('sample_rna.txt', 'r')
stringfile = file.read()
'''
A Alanina
B Ácido aspártico o Asparagina
C Cisteína
D Ácido aspártico
E Ácido glutámico
F Fenilalanina
G Glicina
H Histidina
I Isoleucina
K Lisina
L Leucina
M Metionina
N Asparagina
O Pirrolisina
P Prolina
Q Glutamina
R Arginina
S Serina
T Treonina
U Selenocisteína
V Valina
W Triptófano
Y Tirosina
'''
def procesar_fasta(fichero):
secuencias = fichero.split(">")[1:]
resultado = []
for secuencia in secuencias:
descripcion = secuencia.split("\n")[0]
bases = secuencia.split("\n")[1:]
bases = ''.join(bases).replace("T", "U")
objeto = {'descripcion': descripcion, 'bases': bases}
resultado.append(objeto)
return resultado
def secuencia_a_codones(secuencia):
codones = []
for i in range(0, len(secuencia)-(3+len(secuencia)%3), 3):
codones.append(secuencia[i:i+3])
return codones
def codon_a_aminoacido(codon):
rna_codon = {"UUU" : "F", "CUU" : "L", "AUU" : "I", "GUU" : "V",
"UUC" : "F", "CUC" : "L", "AUC" : "I", "GUC" : "V",
"UUA" : "L", "CUA" : "L", "AUA" : "I", "GUA" : "V",
"UUG" : "L", "CUG" : "L", "AUG" : "M", "GUG" : "V",
"UCU" : "S", "CCU" : "P", "ACU" : "T", "GCU" : "A",
"UCC" : "S", "CCC" : "P", "ACC" : "T", "GCC" : "A",
"UCA" : "S", "CCA" : "P", "ACA" : "T", "GCA" : "A",
"UCG" : "S", "CCG" : "P", "ACG" : "T", "GCG" : "A",
"UAU" : "Y", "CAU" : "H", "AAU" : "N", "GAU" : "D",
"UAC" : "Y", "CAC" : "H", "AAC" : "N", "GAC" : "D",
"UAA" : "STOP", "CAA" : "Q", "AAA" : "K", "GAA" : "E",
"UAG" : "STOP", "CAG" : "Q", "AAG" : "K", "GAG" : "E",
"UGU" : "C", "CGU" : "R", "AGU" : "S", "GGU" : "G",
"UGC" : "C", "CGC" : "R", "AGC" : "S", "GGC" : "G",
"UGA" : "STOP", "CGA" : "R", "AGA" : "R", "GGA" : "G",
"UGG" : "W", "CGG" : "R", "AGG" : "R", "GGG" : "G"
}
return rna_codon[codon]
def secuencia_a_aminoacidos(secuencia):
codones = secuencia_a_codones(secuencia)
aminoacidos = []
for codon in codones:
aminoacido = codon_a_aminoacido(codon)
aminoacidos.append(aminoacido)
return aminoacidos
# 1º Pocesamos un fichero en formato FASTA para extraer las secuencias que contiene, nos quedamos con la primera [0]
secuencia1 = procesar_fasta(stringfile)[0]
# 2º Convertimos la secuencia a aminoacidos
print(secuencia_a_aminoacidos(secuencia1['bases']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment