Skip to content

Instantly share code, notes, and snippets.

@audy
Created April 11, 2023 06:43
Show Gist options
  • Save audy/03dc2558faca4fd841ce145cc72f2d3c to your computer and use it in GitHub Desktop.
Save audy/03dc2558faca4fd841ce145cc72f2d3c to your computer and use it in GitHub Desktop.
encode ascii text as DNA sequences
#!/usr/bin/env python3
from itertools import product
from typing import Dict, Tuple
import sys
def generate_codon_mapping(
dna_alphabet="GATC",
alphabet="|abcdefghijklmnopqrstuvwxyz.!?,;() 01234567890",
codon_length=3,
) -> Dict[str, str]:
"""
Returns a dictionary mapping codon -> character. | is preserved as a
special character meaning START so you can decode DNA sequences that have
been concatenated together
"""
codons = ["".join(x) for x in list(product(dna_alphabet, repeat=codon_length))]
assert len(codons) >= len(alphabet), "We require more codons!"
return {codon: character for codon, character in zip(codons, alphabet)}
def encode(input_string: str) -> str:
character_to_codon = {character: codon for codon, character in generate_codon_mapping().items()}
output = []
for character in input_string:
output.append(character_to_codon[character])
return "".join(output)
def decode(input_string: str) -> str:
"""
Raw transcribe function. Does not care about start codons.
"""
codon_to_character = generate_codon_mapping()
output = []
for i in range(0, len(input_string))[::3]:
codon = input_string[i : i + 3]
character = codon_to_character[codon]
output.append(character)
return "".join(output)
def test_encode_decode():
test_string = "|the (quick?) fox jumped over; the lazy! d0g."
assert decode(encode(test_string)) == test_string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment