Skip to content

Instantly share code, notes, and snippets.

@MatrixManAtYrService
Last active August 25, 2023 04:39
Show Gist options
  • Save MatrixManAtYrService/ed387dd968dc51ea3e36e41062529692 to your computer and use it in GitHub Desktop.
Save MatrixManAtYrService/ed387dd968dc51ea3e36e41062529692 to your computer and use it in GitHub Desktop.
Gene Structure
GCAAAGGCTCACCCAAGGCTCCGAATCACACTATCTGAGCAACATTTCAAGACACTCTGCAACACCACGCCAGCCTCGGAAAAGCGTGCAAATCTGCTGTGAAGATGCTCAGGCCTTCATTGCCAAAGACTCTGACGGCTTTTGGCCCGCCTTAACCAGCTGGGTGGCCGGCCCGGACAGTGCCCGTCGCGGATCTCTGCATGGCTTCAAACCAGAAAGGTGCTCTCAACGTCGGGCAACAGTGAGGTATGCGACTCCCCTTGCATCACATGGACTTGTCCTGGGAGGCCTTTTCTTGGGCCGTTCATTGCGGAGATGTCGGCAAGGTTCTCTGCAAGACCGTGTCAGCCACGGCCCTTGCGAGGCTGAGTTGAGGTCTGGCTATCTGTCTCGAGATGGACCTCACTGGGTCGTCATGGCTGGCTCGAGGCCTTGCTGGCTGGGCCTTTGCCTGGAGCTTCATGATATTGCCAAGCCTCGGAGCTCCCTTCGAATTGGTCAAGTGGTCTGCTTGGCTCGCCGTACCCAAGGTTACCCCAAGGTACCTACCTACCTACCTACACTAACCTACCTACCCTCCCTCTGCGGAGGAGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCAGCAACAGCGACAAATCAGCACACACGGCAAGCAAAGCACCCACGGGAGCTGGCTGTTAGCTGACTGAGCTCGAGCTTCCATCCCTTCGCTTAGCTTGGACGTCGGCACCTTCCCTCCTGTACGGACGCATGTCTCAGGCGCGGTTGCGCTCAAACACGGCGGGCAATCACCTGGAAGGACACAACATCAAAGGGCCCATCAGTGGCATGCATCTTCGGCCCTTCTGAATTTTCGAGCGTCTCCTGGCGCCAACTTCAACACCAAACCTCGTCGCGATCATCCTTCCGGCCAAAACACCACTTTTCCATCCGCCAACCCCGCTTCATCCACTTCCACCAGTTCCACGCAAATCCCGCGCCGTTGCCCCGTCAATATCAACATCCAGCATCGTCACGACCACCGGTTTTATTCTCGACACTACGTCCATCCGCCCATCCCGCGCACAACCATCCCACCCCGTTCAATCCCTTCAAACAAACAAACAAACACAATGGCTGGCGGAAAGGGCAAGTCGTCTGGCGGCAAGAGCTCGGGTGGAAAGACCTCGGGCGTCGAAGGTCCCAAGAAGCAGCAGAGCCATTCTGCTCGCGCTGGTCTTCAGGTACGTTTTTATAGTTGACGAGCTTCATGGCTCTGTCCGCTTTCAATTCCTGCTCCGATCTGCACCATCGACGTCGTCTCATCAACCAACTTTTTTTTTGCTCGCATCCGCCATCCCTGTCGTCAGATGTCGATGGAATCACGCCGATACACCACAACGCGTCCACCGAGCCCCCTACATTTCCGGACAACCCCATAGTGCCAACCTACGAAGAACGCGAACTCGCTGCCTCACACGTCGGTCGCACACAACCCCGCCGCTGGGTAAAAGAAAAAAACCCCCATTGCGCCCCGTACGCGATACTATTTCGGCTTCATATGGACGCGTTGTTCTGTGCATCCTTCTCTCGCCCTGGCCCCGCACCCGGACCTTGTTCCGGTCCCTAACTTTGGATTTCTCGCTCTGACAACGAGGCGGAGCTGCGGCGTGCATTTCCGTCCTCTGGAGTTTGCCGTGTGTCTGATGCGCCATGCTGACTTGAATCTAATTAGTTCCCTTGCGGTCGTGTCAAGCGTTTCCTGAAGCAGAACACCCAAAACAAGATGCGCGTTGGCGCCAAGGCTGCCGTCTATGTTACTGCCGTGCTGGAATATTTGACTGCTGAAGTCCTCGAACTTGCGGGTGTAAGTCATTCATACCCCATCCGCTCGGCCATATTGCTTGGCGACGAGGAGGCTCCCATTTCCAAGTTTTACCTGCTTTATGGCTAACTCAATGATTTCCGTTGTATAGAACGCTGCCAAGGACCTGAAGGTCAAGCGTATTACTCCCCGCCATCTTCAGCTTGCCATCCGTGGAGATGAGGAGCTTGATACCCTCATCCGCGCCACTATCGCCTTCGGTGGTGTCCTCCCCCACATTAACCGCGCCCTTCTGCTCAAGGTAGAGCAGAAGAAGAAGGCCAAGGCTCAGGAGGCTTAAGTCCACAACTGCACCGAAAAACTCGACGTCGCACACGTTTCGCACACTGTCTTGCATCTCTCTAGCCCCGCTCCGGGCTGACGGGCAGGACAGCAATCGGCTTGTTCAAGGGATGTCACGGGGTGATGCCGGGGGGGTTGGGGATTGTCTTCGCTAATCCAACACTAACGCCGCTGTATTGTTGGGTATTTTCTTCCTATTATTTGTTGTTGGGATACGCACACGGGCTTGGGGCCCACCGAACCATGGCTTGAAAAAGCCGAGCACTCCCGGTGTCTGCGACGGCAACAAGCGCAGCGCATAGACGACGGCGAACTTCCCTTGCGACACTTCCAGCAGAGTGGTGCGTTCGGAACACACACGAAAAGCGAAGCGTTCTGTATCATCCAGACGTGGAAGCCGCTTGGGTGGCGATGAGCAGGGAGACCATTCACTGTATAATGTCATCTTGGAGTGCACCACACACTGGGAACGGCGAGCAATCTCCCCTCGTCACGGCGCGGCGTGCGACAGCAGCTCTTTTACGATATCTGTAACAACATGAATCTTCTTTATCGTTTTTTTTATAGTACGACTCGGCCATGAGGGCTTAGCGAGGAAAACATCAGGACACGTTAAATGAACGTGTCGGTTGGTACTGGTTGCAGGTAGTGGCGGAATAAAGGCTACAAGGAAGAGCCTTGAAAATTTGCGATACCAATTCTTATTATGTTCTCTCTACAGTCTGGGATGCGCAATTCTATTCATGGTGACTACAATGTGTTCAGTCTCACCATCTGCTGTTTCGGACGACACTGATACCCGCTGCCGATTACTACATGAGAAAACGCAGGTAAGGTAGATCCAAGTATTGTTGTCATTCTAGAAGCTAACTTGGTGAACTCTCTTTAAGCAAGACCTCTATGCCCTACCGATCCATGACCATGGAGTTTTCGTACCTATAACAAGACGATATGTGTCATTCAGAGCTTTGATCCTCTACCTTTCAAGGTTTTTACTTCCCGGACAATTGATCACGTTTGCATGGCCACATCATTCTCCATAGGCCCGAATTAAATGAATGAGGCAAACGACTCGTAAATGTCCGGGTTAGCGATTCCCACAGCGAGACGGCCTCCAGCCAAGGGCGACACCGGCGAAACTGAAATGGTCTCCAGATCGTCCTTCAACAGCTTCGCAGGATCAATAGTCATCTCATCCGCAACCATGGTAATGTAACAGTCCTGGCGAGAACTGCAGCCGTCCAGCCTAGAATAGGGCTCAAGCGCGGCAACCGTGAGGTTGCCGGTGTTGTCATCCTCCGGGTCCTTCTGCGTCCCTTGTGCTGCTATCATGGTCTCCTTCCAATCGAACTTGATAGTCGCCTCCTTGACAAGCAACTGTTCATCTTCCGAACCCGAGGCGTCTGAAGAAGAGCTACTGCTGCCACCAGCAGAGGAGGTCGACGATGACGAAGAAGAGTCATCACTTTTTTTCTCCCCGACACTCCCACTCCCATTTCCCTCGTGTCCCCTTCCCTTCCTCCCACTACTTCTCTGTCCACCCATCCTCCTCTCCCTTTCTTCCTGTCCACTCCTACTCCTCCTCGCTCCCAACGCCTTCTTCATAGTCTCCCTCTCCCTCTCCCTTTCCTTACGCCTCTGAGCCTCGATGGCGTCCTTATCCCTATCATTCACCCACACCAATTTCTTGGCATTCCTGTCAATCTTAGCACGACCTTCTTCGCACGCAGCGATGACAGTCTGTCGACACTCGGACGGGTAGTCGCAGTGAGACCAGCAGTTGTGGGTGCGTTCGACGTACATCTTTTCTTTGGCCCGGGCGATGGAGCGGGTCTCGCGCTCAGAGACGTGATGCCAGGCTAGGGTTGTAGCTCTAGGCGATTTTGAGCCTGGGCCTGGGGGTGAAGATGAAGACGAACTTGAAGATGAGTCTAAGGGGGGGACGGGGATGAGATGCCATGTTGCAAAGTCTTGGAGGCGAGCGGCGGCGACTTTGTCATTGGCAGATTGGGATGAGTCCGAGTCAGAGGTGGTGGTGGTATCTTTTCTATTAGAGCTGGAAGAAAGGGAGGGACGAGGAGAGAGAGAAACACTGGTGAGTCTCCTCTTCCCACTCGATCCCAACTTCTTCTTTCCACTATCATCATCATCATCATCATCATCATCATCACCACCACCACCAACTGTCAAAGCACGCCTGGCAGCAGCACCCGTCACACTCCGTCGAAAGCTTCCCCACGCCGACCAACCCAAGTAATCAAACTCGGCGGTGCACGGTCCTCCTCCTTTTCTGCCACTTCCGCTTCGCTTTCTTTTCTTTCCAGTCTTGTCTCTCTCTTCAGATTCCCTTTGATTAATGGCAGGCATTAAACAGCAAAAGACATGTCCACACTCCAGGCATCTCCTCGTGCAGGCTAGCCGGTATACAGACCGGCAGGAGTGGCAGGACCACAGCCAGGGGTAGGGAGCGGGAGGCGAGAGCATGGGCGGCGTTGTTGGTGGTGTTGGAGACTTTTGGGTATTGCTGCTTGATGACGTACTTCTGTTGGTAATTGGACTTGTGGGTGACGGCTGAGTTGGTGAAAATTTGGCTGTGTTTTTGTTGTCGGTGGTGGTGGTGCCACTACCATCAAAAGTATCAGCGATGGGCGAGTTGGGACTGTTTTGAGGCTTGACTTTTGTGGGGGAGTTGATCATCTCTGAGATAGAGCCGGTGACAATGGTTAGCTCTGGGATGATGTCGAAGAGGACGAGCCTGGGTGGCTTTGCCTTTGCATTAGGTGTTGAGGGCATCTTGTTTGGACTGTTGTGTGTCCTTGTTGAGTTG
from pprint import pprint
from dataclasses import dataclass
from enum import Enum, auto
from time import sleep
from rich import print
from rich.text import Text
from rich.style import Style
from rich.console import Console
class Role(Enum):
untranslated = auto()
start_codon = auto()
stop_codon = auto()
intron = auto()
for_translation = auto()
class Mode(Enum):
not_translating = auto()
excluding_intron= auto()
reading = auto()
@dataclass
class Region:
dna: str
role: Role
def styled_text(self) -> Text:
text = Text(self.dna)
if self.role == Role.start_codon:
text.stylize(style=Style(bgcolor="green"), start=0, end=len(self.dna))
elif self.role == Role.stop_codon:
text.stylize(style=Style(bgcolor="red"), start=0, end=len(self.dna))
elif self.role == Role.intron:
text.stylize(style="underline", start=0, end=len(self.dna))
elif self.role == Role.untranslated:
text.stylize(style="italic", start=0, end=len(self.dna))
return text
with open('genome.txt') as f:
dna = f.read()
regions = []
buffer = []
def got_start_codon(codon):
global buffer, mode
previous = ''.join(buffer[:-3])
match mode:
case Mode.not_translating:
role = Role.untranslated
case Mode.excluding_intron:
role = Role.intron
case Mode.reading:
return
regions.extend([Region(previous, role), Region(codon, Role.start_codon)])
buffer = []
mode = Mode.reading
def got_stop_codon(codon):
global buffer, mode
previous = ''.join(buffer[:-3])
if mode == Mode.reading:
regions.extend([Region(previous, Role.for_translation), Region(codon, Role.stop_codon)])
buffer = []
mode = Mode.excluding_intron
# start not translating
mode = Mode.not_translating
for nucleotide in dna:
buffer.append(nucleotide)
if len(buffer) > 3:
last3 = ''.join(buffer[-3:])
match last3:
case 'ATG': # start codon
got_start_codon(last3)
case 'TAA' | 'TAG' | 'TGA': # end codon
got_stop_codon(last3)
# unless the sequence terminated with a codon,
# mark the remainder as untranslated
if buffer:
regions.append(Region(''.join(buffer), Role.untranslated))
console = Console()
# print with the start codons at the beginning of the lines
for region in regions:
if region.role in [Role.start_codon, Role.for_translation]:
console.print(region.styled_text(), end='')
else:
console.print(region.styled_text())
sleep(0.15) # just so the asciicast can see it all
# Create a Console instance to record content for HTML export
html_console = Console(force_terminal=True, record=True)
for region in regions:
if region.role in [Role.start_codon, Role.for_translation]:
html_console.print(region.styled_text(), end='')
else:
html_console.print(region.styled_text(), end='\n')
# Save the recorded content to an HTML file
with open("genome.html", "wt") as html_file:
html_file.write(html_console.export_html())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment