Skip to content

Instantly share code, notes, and snippets.

@damiankao
Last active January 28, 2021 00:09
Show Gist options
  • Save damiankao/81b6ebd123b9ccf98e0e47f1ddd3ddd5 to your computer and use it in GitHub Desktop.
Save damiankao/81b6ebd123b9ccf98e0e47f1ddd3ddd5 to your computer and use it in GitHub Desktop.
'''
Naive probabilistic approach backwards engineer the vaccine sequence:
1. For every codon-position, base, and amino-acid result, calculate a probability of base change.
2. Apply this probability to the viral sequence and generate a vaccine sequence.
3. Compare the generated vaccine sequence to the known vaccine sequence and check for % match.
This code is in reference to:
https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/
Requires python3
'''
from collections import Counter,defaultdict
import random
codon_comparison = [x.strip().split(',')[1:] for x in '''0,ATG,ATG
3,TTT,TTC
6,GTT,GTG
9,TTT,TTC
12,CTT,CTG
15,GTT,GTG
18,TTA,CTG
21,TTG,CTG
24,CCA,CCT
27,CTA,CTG
30,GTC,GTG
33,TCT,TCC
36,AGT,AGC
39,CAG,CAG
42,TGT,TGT
45,GTT,GTG
48,AAT,AAC
51,CTT,CTG
54,ACA,ACC
57,ACC,ACC
60,AGA,AGA
63,ACT,ACA
66,CAA,CAG
69,TTA,CTG
72,CCC,CCT
75,CCT,CCA
78,GCA,GCC
81,TAC,TAC
84,ACT,ACC
87,AAT,AAC
90,TCT,AGC
93,TTC,TTT
96,ACA,ACC
99,CGT,AGA
102,GGT,GGC
105,GTT,GTG
108,TAT,TAC
111,TAC,TAC
114,CCT,CCC
117,GAC,GAC
120,AAA,AAG
123,GTT,GTG
126,TTC,TTC
129,AGA,AGA
132,TCC,TCC
135,TCA,AGC
138,GTT,GTG
141,TTA,CTG
144,CAT,CAC
147,TCA,TCT
150,ACT,ACC
153,CAG,CAG
156,GAC,GAC
159,TTG,CTG
162,TTC,TTC
165,TTA,CTG
168,CCT,CCT
171,TTC,TTC
174,TTT,TTC
177,TCC,AGC
180,AAT,AAC
183,GTT,GTG
186,ACT,ACC
189,TGG,TGG
192,TTC,TTC
195,CAT,CAC
198,GCT,GCC
201,ATA,ATC
204,CAT,CAC
207,GTC,GTG
210,TCT,TCC
213,GGG,GGC
216,ACC,ACC
219,AAT,AAT
222,GGT,GGC
225,ACT,ACC
228,AAG,AAG
231,AGG,AGA
234,TTT,TTC
237,GAT,GAC
240,AAC,AAC
243,CCT,CCC
246,GTC,GTG
249,CTA,CTG
252,CCA,CCC
255,TTT,TTC
258,AAT,AAC
261,GAT,GAC
264,GGT,GGG
267,GTT,GTG
270,TAT,TAC
273,TTT,TTT
276,GCT,GCC
279,TCC,AGC
282,ACT,ACC
285,GAG,GAG
288,AAG,AAG
291,TCT,TCC
294,AAC,AAC
297,ATA,ATC
300,ATA,ATC
303,AGA,AGA
306,GGC,GGC
309,TGG,TGG
312,ATT,ATC
315,TTT,TTC
318,GGT,GGC
321,ACT,ACC
324,ACT,ACA
327,TTA,CTG
330,GAT,GAC
333,TCG,AGC
336,AAG,AAG
339,ACC,ACC
342,CAG,CAG
345,TCC,AGC
348,CTA,CTG
351,CTT,CTG
354,ATT,ATC
357,GTT,GTG
360,AAT,AAC
363,AAC,AAC
366,GCT,GCC
369,ACT,ACC
372,AAT,AAC
375,GTT,GTG
378,GTT,GTC
381,ATT,ATC
384,AAA,AAA
387,GTC,GTG
390,TGT,TGC
393,GAA,GAG
396,TTT,TTC
399,CAA,CAG
402,TTT,TTC
405,TGT,TGC
408,AAT,AAC
411,GAT,GAC
414,CCA,CCC
417,TTT,TTC
420,TTG,CTG
423,GGT,GGC
426,GTT,GTC
429,TAT,TAC
432,TAC,TAC
435,CAC,CAC
438,AAA,AAG
441,AAC,AAC
444,AAC,AAC
447,AAA,AAG
450,AGT,AGC
453,TGG,TGG
456,ATG,ATG
459,GAA,GAA
462,AGT,AGC
465,GAG,GAG
468,TTC,TTC
471,AGA,CGG
474,GTT,GTG
477,TAT,TAC
480,TCT,AGC
483,AGT,AGC
486,GCG,GCC
489,AAT,AAC
492,AAT,AAC
495,TGC,TGC
498,ACT,ACC
501,TTT,TTC
504,GAA,GAG
507,TAT,TAC
510,GTC,GTG
513,TCT,TCC
516,CAG,CAG
519,CCT,CCT
522,TTT,TTC
525,CTT,CTG
528,ATG,ATG
531,GAC,GAC
534,CTT,CTG
537,GAA,GAA
540,GGA,GGC
543,AAA,AAG
546,CAG,CAG
549,GGT,GGC
552,AAT,AAC
555,TTC,TTC
558,AAA,AAG
561,AAT,AAC
564,CTT,CTG
567,AGG,CGC
570,GAA,GAG
573,TTT,TTC
576,GTG,GTG
579,TTT,TTT
582,AAG,AAG
585,AAT,AAC
588,ATT,ATC
591,GAT,GAC
594,GGT,GGC
597,TAT,TAC
600,TTT,TTC
603,AAA,AAG
606,ATA,ATC
609,TAT,TAC
612,TCT,AGC
615,AAG,AAG
618,CAC,CAC
621,ACG,ACC
624,CCT,CCT
627,ATT,ATC
630,AAT,AAC
633,TTA,CTC
636,GTG,GTG
639,CGT,CGG
642,GAT,GAT
645,CTC,CTG
648,CCT,CCT
651,CAG,CAG
654,GGT,GGC
657,TTT,TTC
660,TCG,TCT
663,GCT,GCT
666,TTA,CTG
669,GAA,GAA
672,CCA,CCC
675,TTG,CTG
678,GTA,GTG
681,GAT,GAT
684,TTG,CTG
687,CCA,CCC
690,ATA,ATC
693,GGT,GGC
696,ATT,ATC
699,AAC,AAC
702,ATC,ATC
705,ACT,ACC
708,AGG,CGG
711,TTT,TTT
714,CAA,CAG
717,ACT,ACA
720,TTA,CTG
723,CTT,CTG
726,GCT,GCC
729,TTA,CTG
732,CAT,CAC
735,AGA,AGA
738,AGT,AGC
741,TAT,TAC
744,TTG,CTG
747,ACT,ACA
750,CCT,CCT
753,GGT,GGC
756,GAT,GAT
759,TCT,AGC
762,TCT,AGC
765,TCA,AGC
768,GGT,GGA
771,TGG,TGG
774,ACA,ACA
777,GCT,GCT
780,GGT,GGT
783,GCT,GCC
786,GCA,GCC
789,GCT,GCT
792,TAT,TAC
795,TAT,TAT
798,GTG,GTG
801,GGT,GGC
804,TAT,TAC
807,CTT,CTG
810,CAA,CAG
813,CCT,CCT
816,AGG,AGA
819,ACT,ACC
822,TTT,TTC
825,CTA,CTG
828,TTA,CTG
831,AAA,AAG
834,TAT,TAC
837,AAT,AAC
840,GAA,GAG
843,AAT,AAC
846,GGA,GGC
849,ACC,ACC
852,ATT,ATC
855,ACA,ACC
858,GAT,GAC
861,GCT,GCC
864,GTA,GTG
867,GAC,GAT
870,TGT,TGT
873,GCA,GCT
876,CTT,CTG
879,GAC,GAT
882,CCT,CCT
885,CTC,CTG
888,TCA,AGC
891,GAA,GAG
894,ACA,ACA
897,AAG,AAG
900,TGT,TGC
903,ACG,ACC
906,TTG,CTG
909,AAA,AAG
912,TCC,TCC
915,TTC,TTC
918,ACT,ACC
921,GTA,GTG
924,GAA,GAA
927,AAA,AAG
930,GGA,GGC
933,ATC,ATC
936,TAT,TAC
939,CAA,CAG
942,ACT,ACC
945,TCT,AGC
948,AAC,AAC
951,TTT,TTC
954,AGA,CGG
957,GTC,GTG
960,CAA,CAG
963,CCA,CCC
966,ACA,ACC
969,GAA,GAA
972,TCT,TCC
975,ATT,ATC
978,GTT,GTG
981,AGA,CGG
984,TTT,TTC
987,CCT,CCC
990,AAT,AAT
993,ATT,ATC
996,ACA,ACC
999,AAC,AAT
1002,TTG,CTG
1005,TGC,TGC
1008,CCT,CCC
1011,TTT,TTC
1014,GGT,GGC
1017,GAA,GAG
1020,GTT,GTG
1023,TTT,TTC
1026,AAC,AAT
1029,GCC,GCC
1032,ACC,ACC
1035,AGA,AGA
1038,TTT,TTC
1041,GCA,GCC
1044,TCT,TCT
1047,GTT,GTG
1050,TAT,TAC
1053,GCT,GCC
1056,TGG,TGG
1059,AAC,AAC
1062,AGG,CGG
1065,AAG,AAG
1068,AGA,CGG
1071,ATC,ATC
1074,AGC,AGC
1077,AAC,AAT
1080,TGT,TGC
1083,GTT,GTG
1086,GCT,GCC
1089,GAT,GAC
1092,TAT,TAC
1095,TCT,TCC
1098,GTC,GTG
1101,CTA,CTG
1104,TAT,TAC
1107,AAT,AAC
1110,TCC,TCC
1113,GCA,GCC
1116,TCA,AGC
1119,TTT,TTC
1122,TCC,AGC
1125,ACT,ACC
1128,TTT,TTC
1131,AAG,AAG
1134,TGT,TGC
1137,TAT,TAC
1140,GGA,GGC
1143,GTG,GTG
1146,TCT,TCC
1149,CCT,CCT
1152,ACT,ACC
1155,AAA,AAG
1158,TTA,CTG
1161,AAT,AAC
1164,GAT,GAC
1167,CTC,CTG
1170,TGC,TGC
1173,TTT,TTC
1176,ACT,ACA
1179,AAT,AAC
1182,GTC,GTG
1185,TAT,TAC
1188,GCA,GCC
1191,GAT,GAC
1194,TCA,AGC
1197,TTT,TTC
1200,GTA,GTG
1203,ATT,ATC
1206,AGA,CGG
1209,GGT,GGA
1212,GAT,GAT
1215,GAA,GAA
1218,GTC,GTG
1221,AGA,CGG
1224,CAA,CAG
1227,ATC,ATT
1230,GCT,GCC
1233,CCA,CCT
1236,GGG,GGA
1239,CAA,CAG
1242,ACT,ACA
1245,GGA,GGC
1248,AAG,AAG
1251,ATT,ATC
1254,GCT,GCC
1257,GAT,GAC
1260,TAT,TAC
1263,AAT,AAC
1266,TAT,TAC
1269,AAA,AAG
1272,TTA,CTG
1275,CCA,CCC
1278,GAT,GAC
1281,GAT,GAC
1284,TTT,TTC
1287,ACA,ACC
1290,GGC,GGC
1293,TGC,TGT
1296,GTT,GTG
1299,ATA,ATT
1302,GCT,GCC
1305,TGG,TGG
1308,AAT,AAC
1311,TCT,AGC
1314,AAC,AAC
1317,AAT,AAC
1320,CTT,CTG
1323,GAT,GAC
1326,TCT,TCC
1329,AAG,AAA
1332,GTT,GTC
1335,GGT,GGC
1338,GGT,GGC
1341,AAT,AAC
1344,TAT,TAC
1347,AAT,AAT
1350,TAC,TAC
1353,CTG,CTG
1356,TAT,TAC
1359,AGA,CGG
1362,TTG,CTG
1365,TTT,TTC
1368,AGG,CGG
1371,AAG,AAG
1374,TCT,TCC
1377,AAT,AAT
1380,CTC,CTG
1383,AAA,AAG
1386,CCT,CCC
1389,TTT,TTC
1392,GAG,GAG
1395,AGA,CGG
1398,GAT,GAC
1401,ATT,ATC
1404,TCA,TCC
1407,ACT,ACC
1410,GAA,GAG
1413,ATC,ATC
1416,TAT,TAT
1419,CAG,CAG
1422,GCC,GCC
1425,GGT,GGC
1428,AGC,AGC
1431,ACA,ACC
1434,CCT,CCT
1437,TGT,TGT
1440,AAT,AAC
1443,GGT,GGC
1446,GTT,GTG
1449,GAA,GAA
1452,GGT,GGC
1455,TTT,TTC
1458,AAT,AAC
1461,TGT,TGC
1464,TAC,TAC
1467,TTT,TTC
1470,CCT,CCA
1473,TTA,CTG
1476,CAA,CAG
1479,TCA,TCC
1482,TAT,TAC
1485,GGT,GGC
1488,TTC,TTT
1491,CAA,CAG
1494,CCC,CCC
1497,ACT,ACA
1500,AAT,AAT
1503,GGT,GGC
1506,GTT,GTG
1509,GGT,GGC
1512,TAC,TAT
1515,CAA,CAG
1518,CCA,CCC
1521,TAC,TAC
1524,AGA,AGA
1527,GTA,GTG
1530,GTA,GTG
1533,GTA,GTG
1536,CTT,CTG
1539,TCT,AGC
1542,TTT,TTC
1545,GAA,GAA
1548,CTT,CTG
1551,CTA,CTG
1554,CAT,CAT
1557,GCA,GCC
1560,CCA,CCT
1563,GCA,GCC
1566,ACT,ACA
1569,GTT,GTG
1572,TGT,TGC
1575,GGA,GGC
1578,CCT,CCT
1581,AAA,AAG
1584,AAG,AAA
1587,TCT,AGC
1590,ACT,ACC
1593,AAT,AAT
1596,TTG,CTC
1599,GTT,GTG
1602,AAA,AAG
1605,AAC,AAC
1608,AAA,AAA
1611,TGT,TGC
1614,GTC,GTG
1617,AAT,AAC
1620,TTC,TTC
1623,AAC,AAC
1626,TTC,TTC
1629,AAT,AAC
1632,GGT,GGC
1635,TTA,CTG
1638,ACA,ACC
1641,GGC,GGC
1644,ACA,ACC
1647,GGT,GGC
1650,GTT,GTG
1653,CTT,CTG
1656,ACT,ACA
1659,GAG,GAG
1662,TCT,AGC
1665,AAC,AAC
1668,AAA,AAG
1671,AAG,AAG
1674,TTT,TTC
1677,CTG,CTG
1680,CCT,CCA
1683,TTC,TTC
1686,CAA,CAG
1689,CAA,CAG
1692,TTT,TTT
1695,GGC,GGC
1698,AGA,CGG
1701,GAC,GAT
1704,ATT,ATC
1707,GCT,GCC
1710,GAC,GAT
1713,ACT,ACC
1716,ACT,ACA
1719,GAT,GAC
1722,GCT,GCC
1725,GTC,GTT
1728,CGT,AGA
1731,GAT,GAT
1734,CCA,CCC
1737,CAG,CAG
1740,ACA,ACA
1743,CTT,CTG
1746,GAG,GAA
1749,ATT,ATC
1752,CTT,CTG
1755,GAC,GAC
1758,ATT,ATC
1761,ACA,ACC
1764,CCA,CCT
1767,TGT,TGC
1770,TCT,AGC
1773,TTT,TTC
1776,GGT,GGC
1779,GGT,GGA
1782,GTC,GTG
1785,AGT,TCT
1788,GTT,GTG
1791,ATA,ATC
1794,ACA,ACC
1797,CCA,CCT
1800,GGA,GGC
1803,ACA,ACC
1806,AAT,AAC
1809,ACT,ACC
1812,TCT,AGC
1815,AAC,AAT
1818,CAG,CAG
1821,GTT,GTG
1824,GCT,GCA
1827,GTT,GTG
1830,CTT,CTG
1833,TAT,TAC
1836,CAG,CAG
1839,GAT,GAC
1842,GTT,GTG
1845,AAC,AAC
1848,TGC,TGT
1851,ACA,ACC
1854,GAA,GAA
1857,GTC,GTG
1860,CCT,CCC
1863,GTT,GTG
1866,GCT,GCC
1869,ATT,ATT
1872,CAT,CAC
1875,GCA,GCC
1878,GAT,GAT
1881,CAA,CAG
1884,CTT,CTG
1887,ACT,ACA
1890,CCT,CCT
1893,ACT,ACA
1896,TGG,TGG
1899,CGT,CGG
1902,GTT,GTG
1905,TAT,TAC
1908,TCT,TCC
1911,ACA,ACC
1914,GGT,GGC
1917,TCT,AGC
1920,AAT,AAT
1923,GTT,GTG
1926,TTT,TTT
1929,CAA,CAG
1932,ACA,ACC
1935,CGT,AGA
1938,GCA,GCC
1941,GGC,GGC
1944,TGT,TGT
1947,TTA,CTG
1950,ATA,ATC
1953,GGG,GGA
1956,GCT,GCC
1959,GAA,GAG
1962,CAT,CAC
1965,GTC,GTG
1968,AAC,AAC
1971,AAC,AAT
1974,TCA,AGC
1977,TAT,TAC
1980,GAG,GAG
1983,TGT,TGC
1986,GAC,GAC
1989,ATA,ATC
1992,CCC,CCC
1995,ATT,ATC
1998,GGT,GGC
2001,GCA,GCT
2004,GGT,GGA
2007,ATA,ATC
2010,TGC,TGC
2013,GCT,GCC
2016,AGT,AGC
2019,TAT,TAC
2022,CAG,CAG
2025,ACT,ACA
2028,CAG,CAG
2031,ACT,ACA
2034,AAT,AAC
2037,TCT,AGC
2040,CCT,CCT
2043,CGG,CGG
2046,CGG,AGA
2049,GCA,GCC
2052,CGT,AGA
2055,AGT,AGC
2058,GTA,GTG
2061,GCT,GCC
2064,AGT,AGC
2067,CAA,CAG
2070,TCC,AGC
2073,ATC,ATC
2076,ATT,ATT
2079,GCC,GCC
2082,TAC,TAC
2085,ACT,ACA
2088,ATG,ATG
2091,TCA,TCT
2094,CTT,CTG
2097,GGT,GGC
2100,GCA,GCC
2103,GAA,GAG
2106,AAT,AAC
2109,TCA,AGC
2112,GTT,GTG
2115,GCT,GCC
2118,TAC,TAC
2121,TCT,TCC
2124,AAT,AAC
2127,AAC,AAC
2130,TCT,TCT
2133,ATT,ATC
2136,GCC,GCT
2139,ATA,ATC
2142,CCC,CCC
2145,ACA,ACC
2148,AAT,AAC
2151,TTT,TTC
2154,ACT,ACC
2157,ATT,ATC
2160,AGT,AGC
2163,GTT,GTG
2166,ACC,ACC
2169,ACA,ACA
2172,GAA,GAG
2175,ATT,ATC
2178,CTA,CTG
2181,CCA,CCT
2184,GTG,GTG
2187,TCT,TCC
2190,ATG,ATG
2193,ACC,ACC
2196,AAG,AAG
2199,ACA,ACC
2202,TCA,AGC
2205,GTA,GTG
2208,GAT,GAC
2211,TGT,TGC
2214,ACA,ACC
2217,ATG,ATG
2220,TAC,TAC
2223,ATT,ATC
2226,TGT,TGC
2229,GGT,GGC
2232,GAT,GAT
2235,TCA,TCC
2238,ACT,ACC
2241,GAA,GAG
2244,TGC,TGC
2247,AGC,TCC
2250,AAT,AAC
2253,CTT,CTG
2256,TTG,CTG
2259,TTG,CTG
2262,CAA,CAG
2265,TAT,TAC
2268,GGC,GGC
2271,AGT,AGC
2274,TTT,TTC
2277,TGT,TGC
2280,ACA,ACC
2283,CAA,CAG
2286,TTA,CTG
2289,AAC,AAT
2292,CGT,AGA
2295,GCT,GCC
2298,TTA,CTG
2301,ACT,ACA
2304,GGA,GGG
2307,ATA,ATC
2310,GCT,GCC
2313,GTT,GTG
2316,GAA,GAA
2319,CAA,CAG
2322,GAC,GAC
2325,AAA,AAG
2328,AAC,AAC
2331,ACC,ACC
2334,CAA,CAA
2337,GAA,GAG
2340,GTT,GTG
2343,TTT,TTC
2346,GCA,GCC
2349,CAA,CAA
2352,GTC,GTG
2355,AAA,AAG
2358,CAA,CAG
2361,ATT,ATC
2364,TAC,TAC
2367,AAA,AAG
2370,ACA,ACC
2373,CCA,CCT
2376,CCA,CCT
2379,ATT,ATC
2382,AAA,AAG
2385,GAT,GAC
2388,TTT,TTC
2391,GGT,GGC
2394,GGT,GGC
2397,TTT,TTC
2400,AAT,AAT
2403,TTT,TTC
2406,TCA,AGC
2409,CAA,CAG
2412,ATA,ATT
2415,TTA,CTG
2418,CCA,CCC
2421,GAT,GAT
2424,CCA,CCT
2427,TCA,AGC
2430,AAA,AAG
2433,CCA,CCC
2436,AGC,AGC
2439,AAG,AAG
2442,AGG,CGG
2445,TCA,AGC
2448,TTT,TTC
2451,ATT,ATC
2454,GAA,GAG
2457,GAT,GAC
2460,CTA,CTG
2463,CTT,CTG
2466,TTC,TTC
2469,AAC,AAC
2472,AAA,AAA
2475,GTG,GTG
2478,ACA,ACA
2481,CTT,CTG
2484,GCA,GCC
2487,GAT,GAC
2490,GCT,GCC
2493,GGC,GGC
2496,TTC,TTC
2499,ATC,ATC
2502,AAA,AAG
2505,CAA,CAG
2508,TAT,TAT
2511,GGT,GGC
2514,GAT,GAT
2517,TGC,TGT
2520,CTT,CTG
2523,GGT,GGC
2526,GAT,GAC
2529,ATT,ATT
2532,GCT,GCC
2535,GCT,GCC
2538,AGA,AGG
2541,GAC,GAT
2544,CTC,CTG
2547,ATT,ATT
2550,TGT,TGC
2553,GCA,GCC
2556,CAA,CAG
2559,AAG,AAG
2562,TTT,TTT
2565,AAC,AAC
2568,GGC,GGA
2571,CTT,CTG
2574,ACT,ACA
2577,GTT,GTG
2580,TTG,CTG
2583,CCA,CCT
2586,CCT,CCT
2589,TTG,CTG
2592,CTC,CTG
2595,ACA,ACC
2598,GAT,GAT
2601,GAA,GAG
2604,ATG,ATG
2607,ATT,ATC
2610,GCT,GCC
2613,CAA,CAG
2616,TAC,TAC
2619,ACT,ACA
2622,TCT,TCT
2625,GCA,GCC
2628,CTG,CTG
2631,TTA,CTG
2634,GCG,GCC
2637,GGT,GGC
2640,ACA,ACA
2643,ATC,ATC
2646,ACT,ACA
2649,TCT,AGC
2652,GGT,GGC
2655,TGG,TGG
2658,ACC,ACA
2661,TTT,TTT
2664,GGT,GGA
2667,GCA,GCA
2670,GGT,GGC
2673,GCT,GCC
2676,GCA,GCT
2679,TTA,CTG
2682,CAA,CAG
2685,ATA,ATC
2688,CCA,CCC
2691,TTT,TTT
2694,GCT,GCT
2697,ATG,ATG
2700,CAA,CAG
2703,ATG,ATG
2706,GCT,GCC
2709,TAT,TAC
2712,AGG,CGG
2715,TTT,TTC
2718,AAT,AAC
2721,GGT,GGC
2724,ATT,ATC
2727,GGA,GGA
2730,GTT,GTG
2733,ACA,ACC
2736,CAG,CAG
2739,AAT,AAT
2742,GTT,GTG
2745,CTC,CTG
2748,TAT,TAC
2751,GAG,GAG
2754,AAC,AAC
2757,CAA,CAG
2760,AAA,AAG
2763,TTG,CTG
2766,ATT,ATC
2769,GCC,GCC
2772,AAC,AAC
2775,CAA,CAG
2778,TTT,TTC
2781,AAT,AAC
2784,AGT,AGC
2787,GCT,GCC
2790,ATT,ATC
2793,GGC,GGC
2796,AAA,AAG
2799,ATT,ATC
2802,CAA,CAG
2805,GAC,GAC
2808,TCA,AGC
2811,CTT,CTG
2814,TCT,AGC
2817,TCC,AGC
2820,ACA,ACA
2823,GCA,GCA
2826,AGT,AGC
2829,GCA,GCC
2832,CTT,CTG
2835,GGA,GGA
2838,AAA,AAG
2841,CTT,CTG
2844,CAA,CAG
2847,GAT,GAC
2850,GTG,GTG
2853,GTC,GTC
2856,AAC,AAC
2859,CAA,CAG
2862,AAT,AAT
2865,GCA,GCC
2868,CAA,CAG
2871,GCT,GCA
2874,TTA,CTG
2877,AAC,AAC
2880,ACG,ACC
2883,CTT,CTG
2886,GTT,GTC
2889,AAA,AAG
2892,CAA,CAG
2895,CTT,CTG
2898,AGC,TCC
2901,TCC,TCC
2904,AAT,AAC
2907,TTT,TTC
2910,GGT,GGC
2913,GCA,GCC
2916,ATT,ATC
2919,TCA,AGC
2922,AGT,TCT
2925,GTT,GTG
2928,TTA,CTG
2931,AAT,AAC
2934,GAT,GAT
2937,ATC,ATC
2940,CTT,CTG
2943,TCA,AGC
2946,CGT,AGA
2949,CTT,CTG
2952,GAC,GAC
2955,AAA,CCT
2958,GTT,CCT
2961,GAG,GAG
2964,GCT,GCC
2967,GAA,GAG
2970,GTG,GTG
2973,CAA,CAG
2976,ATT,ATC
2979,GAT,GAC
2982,AGG,AGA
2985,TTG,CTG
2988,ATC,ATC
2991,ACA,ACA
2994,GGC,GGC
2997,AGA,AGA
3000,CTT,CTG
3003,CAA,CAG
3006,AGT,AGC
3009,TTG,CTC
3012,CAG,CAG
3015,ACA,ACA
3018,TAT,TAC
3021,GTG,GTG
3024,ACT,ACC
3027,CAA,CAG
3030,CAA,CAG
3033,TTA,CTG
3036,ATT,ATC
3039,AGA,AGA
3042,GCT,GCC
3045,GCA,GCC
3048,GAA,GAG
3051,ATC,ATT
3054,AGA,AGA
3057,GCT,GCC
3060,TCT,TCT
3063,GCT,GCC
3066,AAT,AAT
3069,CTT,CTG
3072,GCT,GCC
3075,GCT,GCC
3078,ACT,ACC
3081,AAA,AAG
3084,ATG,ATG
3087,TCA,TCT
3090,GAG,GAG
3093,TGT,TGT
3096,GTA,GTG
3099,CTT,CTG
3102,GGA,GGC
3105,CAA,CAG
3108,TCA,AGC
3111,AAA,AAG
3114,AGA,AGA
3117,GTT,GTG
3120,GAT,GAC
3123,TTT,TTT
3126,TGT,TGC
3129,GGA,GGC
3132,AAG,AAG
3135,GGC,GGC
3138,TAT,TAC
3141,CAT,CAC
3144,CTT,CTG
3147,ATG,ATG
3150,TCC,AGC
3153,TTC,TTC
3156,CCT,CCT
3159,CAG,CAG
3162,TCA,TCT
3165,GCA,GCC
3168,CCT,CCT
3171,CAT,CAC
3174,GGT,GGC
3177,GTA,GTG
3180,GTC,GTG
3183,TTC,TTT
3186,TTG,CTG
3189,CAT,CAC
3192,GTG,GTG
3195,ACT,ACA
3198,TAT,TAT
3201,GTC,GTG
3204,CCT,CCC
3207,GCA,GCT
3210,CAA,CAA
3213,GAA,GAG
3216,AAG,AAG
3219,AAC,AAT
3222,TTC,TTC
3225,ACA,ACC
3228,ACT,ACC
3231,GCT,GCT
3234,CCT,CCA
3237,GCC,GCC
3240,ATT,ATC
3243,TGT,TGC
3246,CAT,CAC
3249,GAT,GAC
3252,GGA,GGC
3255,AAA,AAA
3258,GCA,GCC
3261,CAC,CAC
3264,TTT,TTT
3267,CCT,CCT
3270,CGT,AGA
3273,GAA,GAA
3276,GGT,GGC
3279,GTC,GTG
3282,TTT,TTC
3285,GTT,GTG
3288,TCA,TCC
3291,AAT,AAC
3294,GGC,GGC
3297,ACA,ACC
3300,CAC,CAT
3303,TGG,TGG
3306,TTT,TTC
3309,GTA,GTG
3312,ACA,ACA
3315,CAA,CAG
3318,AGG,CGG
3321,AAT,AAC
3324,TTT,TTC
3327,TAT,TAC
3330,GAA,GAG
3333,CCA,CCC
3336,CAA,CAG
3339,ATC,ATC
3342,ATT,ATC
3345,ACT,ACC
3348,ACA,ACC
3351,GAC,GAC
3354,AAC,AAC
3357,ACA,ACC
3360,TTT,TTC
3363,GTG,GTG
3366,TCT,TCT
3369,GGT,GGC
3372,AAC,AAC
3375,TGT,TGC
3378,GAT,GAC
3381,GTT,GTC
3384,GTA,GTG
3387,ATA,ATC
3390,GGA,GGC
3393,ATT,ATT
3396,GTC,GTG
3399,AAC,AAC
3402,AAC,AAT
3405,ACA,ACC
3408,GTT,GTG
3411,TAT,TAC
3414,GAT,GAC
3417,CCT,CCT
3420,TTG,CTG
3423,CAA,CAG
3426,CCT,CCC
3429,GAA,GAG
3432,TTA,CTG
3435,GAC,GAC
3438,TCA,AGC
3441,TTC,TTC
3444,AAG,AAA
3447,GAG,GAG
3450,GAG,GAA
3453,TTA,CTG
3456,GAT,GAC
3459,AAA,AAG
3462,TAT,TAC
3465,TTT,TTT
3468,AAG,AAG
3471,AAT,AAC
3474,CAT,CAC
3477,ACA,ACA
3480,TCA,AGC
3483,CCA,CCC
3486,GAT,GAC
3489,GTT,GTG
3492,GAT,GAC
3495,TTA,CTG
3498,GGT,GGC
3501,GAC,GAT
3504,ATC,ATC
3507,TCT,AGC
3510,GGC,GGA
3513,ATT,ATC
3516,AAT,AAT
3519,GCT,GCC
3522,TCA,AGC
3525,GTT,GTC
3528,GTA,GTG
3531,AAC,AAC
3534,ATT,ATC
3537,CAA,CAG
3540,AAA,AAA
3543,GAA,GAG
3546,ATT,ATC
3549,GAC,GAC
3552,CGC,CGG
3555,CTC,CTG
3558,AAT,AAC
3561,GAG,GAG
3564,GTT,GTG
3567,GCC,GCC
3570,AAG,AAG
3573,AAT,AAT
3576,TTA,CTG
3579,AAT,AAC
3582,GAA,GAG
3585,TCT,AGC
3588,CTC,CTG
3591,ATC,ATC
3594,GAT,GAC
3597,CTC,CTG
3600,CAA,CAA
3603,GAA,GAA
3606,CTT,CTG
3609,GGA,GGG
3612,AAG,AAG
3615,TAT,TAC
3618,GAG,GAG
3621,CAG,CAG
3624,TAT,TAC
3627,ATA,ATC
3630,AAA,AAG
3633,TGG,TGG
3636,CCA,CCC
3639,TGG,TGG
3642,TAC,TAC
3645,ATT,ATC
3648,TGG,TGG
3651,CTA,CTG
3654,GGT,GGC
3657,TTT,TTT
3660,ATA,ATC
3663,GCT,GCC
3666,GGC,GGA
3669,TTG,CTG
3672,ATT,ATT
3675,GCC,GCC
3678,ATA,ATC
3681,GTA,GTG
3684,ATG,ATG
3687,GTG,GTC
3690,ACA,ACA
3693,ATT,ATC
3696,ATG,ATG
3699,CTT,CTG
3702,TGC,TGT
3705,TGT,TGC
3708,ATG,ATG
3711,ACC,ACC
3714,AGT,AGC
3717,TGC,TGC
3720,TGT,TGT
3723,AGT,AGC
3726,TGT,TGC
3729,CTC,CTG
3732,AAG,AAG
3735,GGC,GGC
3738,TGT,TGT
3741,TGT,TGT
3744,TCT,AGC
3747,TGT,TGT
3750,GGA,GGC
3753,TCC,AGC
3756,TGC,TGC
3759,TGC,TGC
3762,AAA,AAG
3765,TTT,TTC
3768,GAT,GAC
3771,GAA,GAG
3774,GAC,GAC
3777,GAC,GAT
3780,TCT,TCT
3783,GAG,GAG
3786,CCA,CCC
3789,GTG,GTG
3792,CTC,CTG
3795,AAA,AAG
3798,GGA,GGC
3801,GTC,GTG
3804,AAA,AAA
3807,TTA,CTG
3810,CAT,CAC
3813,TAC,TAC
3816,ACA,ACA
3819,TAA,TGA'''.strip().split('\n')]
aa = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M','ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T','AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K','AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R','CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L','CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P','CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q','CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R','GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V','GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A','GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E','GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G','TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S','TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L','TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_','TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}
def delta_codon(viral,vaccine):
delta = []
for i,vr_base in enumerate(viral):
vc_base = vaccine[i]
if vr_base != vc_base:
delta.append((i,vc_base))
return delta
cases = defaultdict(lambda : defaultdict(int))
for codon in codon_comparison:
vr_aa = aa[codon[0]]
for i,base in enumerate(codon[0]):
key = (i,base,vr_aa)
cases[key]['all'] += 1
deltas = delta_codon(codon[0],codon[1])
for delta in deltas:
i,vc_base = delta
key = (i,codon[0][i],vr_aa)
cases[key][vc_base] += 1
probs = defaultdict(lambda : defaultdict(float))
for key, case in cases.items():
if len(case) > 1:
for variant, count in case.items():
case_total = float(case['all'])
if variant != 'all':
probs[key][variant] = count / case_total
def simulate(pr):
vr_codons = [x[0] for x in codon_comparison]
new_codons = []
for codon in vr_codons:
vr_aa = aa[codon]
new_codon = ''
for i,base in enumerate(codon):
key = (i,base,vr_aa)
new_base = base
if key in pr:
w = list(pr[key].values())
w.append(1 - sum(w))
c = list(pr[key].keys())
c.append(base)
new_base = random.choices(c, weights=w, k=1)[0]
new_codon += new_base
new_codons.append(new_codon)
return new_codons
vaccine_seq = ''.join([x[1] for x in codon_comparison])
simulate_seq = ''.join(simulate(probs))
def match_percentage(a,b):
count = 0
for i,a_base in enumerate(a):
b_base = b[i]
if a_base == b_base:
count += 1
return float(count) / len(a) * 100
mp = match_percentage(vaccine_seq,simulate_seq)
print('>generated vaccine sequence, ' + str(mp) + '% match with known vaccine sequence')
print(simulate_seq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment