Skip to content

Instantly share code, notes, and snippets.

@dawedawe
Created April 28, 2021 18:36
Show Gist options
  • Save dawedawe/bdfcc6285557ead6da102752dc704798 to your computer and use it in GitHub Desktop.
Save dawedawe/bdfcc6285557ead6da102752dc704798 to your computer and use it in GitHub Desktop.
The (incorrect) central dogma of molecular biology (DNA -> RNA -> protein) modeled in F#
namespace Dogma
module DNA =
type Nucleobases =
| A // Adenine, a Purine
| C // Cytosine, a Pyrimidine
| G // Guanine, a Purine
| T // Thymine, a Pyrimidine
type Codon = (Nucleobases * Nucleobases * Nucleobases)
let complement nucleobase =
match nucleobase with
| A -> T
| C -> G
| G -> C
| T -> A
let complementSeq sequence = Seq.map complement sequence
let parse c =
match System.Char.ToUpper(c) with
| 'A' -> Some A
| 'C' -> Some C
| 'G' -> Some G
| 'T' -> Some T
| _ -> None
let parseSeq (s: string) =
s.ToCharArray()
|> Array.choose parse
module RNA =
type Nucleobases =
| A // Adenine, a Purine
| C // Cytosine, a Pyrimidine
| G // Adenine, a Purine
| U // Uracil, a Pyrimidine
type Codon = (Nucleobases * Nucleobases * Nucleobases)
let complement (nucleobase: Nucleobases) =
match nucleobase with
| A -> U
| C -> G
| G -> C
| U -> A
module Transcription =
let dnaToRna dna =
match dna with
| DNA.A -> RNA.U // RNA has Uracil in place of DNA Thymine
| DNA.C -> RNA.G
| DNA.G -> RNA.C
| DNA.T -> RNA.A
let dnaSeqToRnaSeq (dna: seq<DNA.Nucleobases>) =
dna
|> Seq.map dnaToRna
module Translation =
let seqToCodons (s: seq<'T>) =
let a = s |> Seq.toArray
seq {
for i in 0 .. 3 .. a.Length - 3 do
yield (a.[i], a.[i+1], a.[i+2])
}
type AminoAcid =
| Alanine
| Arginine
| Asparagine
| AsparticAcid
| Cysteine
| Glutamine
| GlutamicAcid
| Glycine
| Histidine
| Isoleucine
| Leucine
| Lysine
| Methionine
| Phenylalanine
| Proline
| Serine
| Threonine
| Tryptophan
| Tyrosine
| Valine
let aminoAcidToOneLetter =
function
| Alanine -> "A"
| Arginine -> "R"
| Asparagine -> "N"
| AsparticAcid -> "D"
| Cysteine -> "C"
| Glutamine -> "Q"
| GlutamicAcid -> "E"
| Glycine -> "G"
| Histidine -> "H"
| Isoleucine -> "I"
| Leucine -> "L"
| Lysine -> "K"
| Methionine -> "M"
| Phenylalanine -> "F"
| Proline -> "P"
| Serine -> "S"
| Threonine -> "T"
| Tryptophan -> "W"
| Tyrosine -> "Y"
| Valine -> "V"
type Signal =
| StartOrAmino of AminoAcid
| Stop
| Amino of AminoAcid
let rnaCodonToSignal =
function
// 1st base U
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Phenylalanine
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Phenylalanine
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Leucine
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Leucine
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Serine
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Serine
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Serine
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Serine
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Tyrosine
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Tyrosine
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Stop
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Stop
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Cysteine
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Cysteine
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Stop
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Tryptophan
// 1st base C
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Leucine
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Leucine
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Leucine
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.G) -> Amino Leucine
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Proline
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Proline
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Proline
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Proline
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Histidine
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Histidine
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino Glutamine
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino Glutamine
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Arginine
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Arginine
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Arginine
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Arginine
// 1st base A
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Isoleucine
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Isoleucine
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Isoleucine
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Methionine
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Threonine
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Threonine
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Threonine
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Threonine
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Asparagine
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Asparagine
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino Lysine
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino Lysine
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Serine
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Serine
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Arginine
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Arginine
// 1st base G
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Valine
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Valine
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Valine
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Valine
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Alanine
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Alanine
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Alanine
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Alanine
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino AsparticAcid
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino AsparticAcid
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino GlutamicAcid
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino GlutamicAcid
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Glycine
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Glycine
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Glycine
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Glycine
let dnaCodonToSignal =
function
// 1st base T
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Phenylalanine
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Phenylalanine
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Leucine
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Leucine
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Serine
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Serine
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Serine
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Serine
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Tyrosine
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Tyrosine
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Stop
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Stop
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Cysteine
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Cysteine
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Stop
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Tryptophan
// 1st base C
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Leucine
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Leucine
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Leucine
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.G) -> Amino Leucine
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Proline
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Proline
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Proline
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Proline
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Histidine
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Histidine
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino Glutamine
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino Glutamine
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Arginine
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Arginine
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Arginine
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Arginine
// 1st base A
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Isoleucine
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Isoleucine
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Isoleucine
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Methionine
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Threonine
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Threonine
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Threonine
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Threonine
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Asparagine
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Asparagine
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino Lysine
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino Lysine
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Serine
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Serine
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Arginine
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Arginine
// 1st base G
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Valine
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Valine
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Valine
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Valine
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Alanine
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Alanine
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Alanine
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Alanine
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino AsparticAcid
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino AsparticAcid
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino GlutamicAcid
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino GlutamicAcid
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Glycine
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Glycine
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Glycine
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Glycine
let dnaCodonsToSignals codons =
codons
|> Seq.map dnaCodonToSignal
let rnaCodonsToSignals codons =
codons
|> Seq.map rnaCodonToSignal
let rnaSeqToSignals rna =
rna
|> seqToCodons
|> rnaCodonsToSignals
module Dogma =
let dogma dnaSeq =
dnaSeq
|> Transcription.dnaSeqToRnaSeq
|> Translation.rnaSeqToSignals
module PrettyPrint =
open System.Text
open Translation
let signalsToString (signals: seq<Translation.Signal>) =
signals
|> Seq.map (fun s -> match s with
| StartOrAmino a -> aminoAcidToOneLetter a
| Amino a -> aminoAcidToOneLetter a
| Stop -> "-STOP-")
|> Seq.fold (fun (b: StringBuilder) s -> b.Append(s)) (StringBuilder())
|> fun s -> s.ToString()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment