Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Created February 16, 2021 17:17
Show Gist options
  • Save gregcaporaso/b7206060986f36737b7826b55096ce8d to your computer and use it in GitHub Desktop.
Save gregcaporaso/b7206060986f36737b7826b55096ce8d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"genetic_code = {\n",
" # this was adapted from \n",
" # https://gitlab.com/RebelCoder/dna-toolset/-/blob/90e8d9249c6dcc82a850aa854da4baabef98ee6f/structures.py\n",
" \"GCT\": \"A\", \"GCC\": \"A\", \"GCA\": \"A\", \"GCG\": \"A\",\n",
" \"TGT\": \"C\", \"TGC\": \"C\",\n",
" \"GAT\": \"D\", \"GAC\": \"D\",\n",
" \"GAA\": \"E\", \"GAG\": \"E\",\n",
" \"TTT\": \"F\", \"TTC\": \"F\",\n",
" \"GGT\": \"G\", \"GGC\": \"G\", \"GGA\": \"G\", \"GGG\": \"G\",\n",
" \"CAT\": \"H\", \"CAC\": \"H\",\n",
" \"ATA\": \"I\", \"ATT\": \"I\", \"ATC\": \"I\",\n",
" \"AAA\": \"K\", \"AAG\": \"K\",\n",
" \"TTA\": \"L\", \"TTG\": \"L\", \"CTT\": \"L\", \"CTC\": \"L\", \"CTA\": \"L\", \"CTG\": \"L\",\n",
" \"ATG\": \"M\",\n",
" \"AAT\": \"N\", \"AAC\": \"N\",\n",
" \"CCT\": \"P\", \"CCC\": \"P\", \"CCA\": \"P\", \"CCG\": \"P\",\n",
" \"CAA\": \"Q\", \"CAG\": \"Q\",\n",
" \"CGT\": \"R\", \"CGC\": \"R\", \"CGA\": \"R\", \"CGG\": \"R\", \"AGA\": \"R\", \"AGG\": \"R\",\n",
" \"TCT\": \"S\", \"TCC\": \"S\", \"TCA\": \"S\", \"TCG\": \"S\", \"AGT\": \"S\", \"AGC\": \"S\",\n",
" \"ACT\": \"T\", \"ACC\": \"T\", \"ACA\": \"T\", \"ACG\": \"T\",\n",
" \"GTT\": \"V\", \"GTC\": \"V\", \"GTA\": \"V\", \"GTG\": \"V\",\n",
" \"TGG\": \"W\",\n",
" \"TAT\": \"Y\", \"TAC\": \"Y\",\n",
" \"TAA\": \"*\", \"TAG\": \"*\", \"TGA\": \"*\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'TTTTATGATGT'.index('ATG')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[4, 7, 10, 13, 16, 19, 22]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(range(4, 25, 3))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T\n"
]
}
],
"source": [
"test_sequence = 'TTTTATGACCT'\n",
"print(test_sequence[10:10+3])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'R'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"genetic_code['AGA']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'AAA' not in 'ACGAAAT'"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def translate(sequence, genetic_code):\n",
" protein = []\n",
" \n",
" if 'ATG' not in sequence:\n",
" return protein\n",
" \n",
" first_codon_position = sequence.index('ATG')\n",
" \n",
" sequence_length = len(sequence)\n",
" \n",
" for codon_start in range(first_codon_position, sequence_length, 3):\n",
" codon = sequence[codon_start:codon_start+3]\n",
" if len(codon) == 3:\n",
" try:\n",
" amino_acid = genetic_code[codon]\n",
" except KeyError:\n",
" raise KeyError(codon + \" is not present in the genetic code.\")\n",
" if amino_acid == '*':\n",
" return protein\n",
" protein.append(amino_acid)\n",
" \n",
" return protein"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'ACU is not present in the genetic code.'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-22-02b19746c2c2>\u001b[0m in \u001b[0;36mtranslate\u001b[0;34m(sequence, genetic_code)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mamino_acid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcodon\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'ACU'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-23-a63d615a2bdf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtranslate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"AGGTACGTGGAACGTACGTGACCGATGGACCACACUCATTGAGTGTGTACACACACGTGTGTGTGACACAACAAC\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-22-02b19746c2c2>\u001b[0m in \u001b[0;36mtranslate\u001b[0;34m(sequence, genetic_code)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mamino_acid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcodon\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcodon\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\" is not present in the genetic code.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mamino_acid\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'*'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mprotein\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'ACU is not present in the genetic code.'"
]
}
],
"source": [
"translate(\"AGGTACGTGGAACGTACGTGACCGATGGACCACACUCATTGAGTGTGTACACACACGTGTGTGTGACACAACAAC\", genetic_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment