Skip to content

Instantly share code, notes, and snippets.

@drdozer
Created March 1, 2017 01:08
Show Gist options
  • Save drdozer/5595c41b9b9e122f91dc27a0ee1d12ff to your computer and use it in GitHub Desktop.
Save drdozer/5595c41b9b9e122f91dc27a0ee1d12ff to your computer and use it in GitHub Desktop.
little scala amm script to generate and then mutate perfect telomeric repeats
import scala.util.Random
val DNA = "AGCT"
val telRep = "TTAGG"
val startCodon = "ATG"
val stopCodons = Set("TAG", "TAA", "TGA")
def telSeq(reps: Int) = telRep * reps
case class CodonAt(beginsAt: Int, endsBefore: Int, codon: String)
case class OrfAt(beginsAt: Int, endsBefore: Int, terminates: Boolean, codons: Seq[CodonAt])
def reverseComplementN(nuc: Char): Char = nuc match {
case 'A' => 'T'
case 'G' => 'C'
case 'C' => 'G'
case 'T' => 'A'
}
def dnaToCodons(dna: String, phase: Int): Seq[CodonAt] = phase match {
case p if p > 0 && p <= 3 =>
dna.drop(p - 1)
.grouped(3)
.zipWithIndex
.to[Seq]
.collect { case (c, i) if c.length == 3 =>
CodonAt(i*3+p, i*3 + 3 + p, c)
}
}
def codonsToORF(codons: Seq[CodonAt]): Seq[OrfAt] =
codons
.tails
.filter { cs => cs.headOption.map(_.codon == startCodon).getOrElse(false) }
.to[Seq].map { cs =>
val (untilStop, stopOnwards) = cs.span(c => !stopCodons(c.codon))
stopOnwards.headOption match {
case None =>
OrfAt(untilStop.head.beginsAt, untilStop.last.endsBefore, false, untilStop)
case Some(stop) =>
OrfAt(untilStop.head.beginsAt, stop.endsBefore, true, untilStop :+ stopOnwards.head)
}
}
def randomNuc(random: Random): Char =
DNA charAt random.nextInt(4)
def nucMutate(nuc: Char, random: Random): Char = randomNuc(random) match {
case n if n == nuc => randomNuc(random)
case n => n
}
def substitutionMutate(dna: String, random: Random): String = {
val mutateAt = random.nextInt(dna.length)
val (pfx, rest) = dna.splitAt(mutateAt)
val toMut = rest.head
val sfx = rest.tail
s"$pfx${nucMutate(toMut, random)}$sfx"
}
def substitutionMutateN(n: Int, dna: String, random: Random): String =
if(n == 0) dna
else substitutionMutateN(n - 1, substitutionMutate(dna, random), random)
val myTel = telSeq(20)
println(s"Got telomere: $myTel")
val phase1 = dnaToCodons(myTel, 1)
val phase2 = dnaToCodons(myTel, 2)
val phase3 = dnaToCodons(myTel, 3)
val phase1Orfs = codonsToORF(phase1)
val phase2Orfs = codonsToORF(phase2)
val phase3Orfs = codonsToORF(phase3)
println(s"Phase 1 codons: $phase1")
println(s"Phase 2 codons: $phase2")
println(s"Phase 3 codons: $phase3")
println(s"Phase 1 orfs: $phase1Orfs")
println(s"Phase 2 orfs: $phase2Orfs")
println(s"Phase 3 orfs: $phase3Orfs")
val rand = new Random()
for { i <- 0 until 50 } yield {
val myTel_1 = substitutionMutateN(4, myTel, rand)
println(s"Got telomere: $myTel_1")
val phase1_1 = dnaToCodons(myTel_1, 1)
val phase2_1 = dnaToCodons(myTel_1, 2)
val phase3_1 = dnaToCodons(myTel_1, 3)
val phase1Orfs_1 = codonsToORF(phase1_1)
val phase2Orfs_1 = codonsToORF(phase2_1)
val phase3Orfs_1 = codonsToORF(phase3_1)
if (!phase1Orfs_1.isEmpty) println(s"Phase 1 orfs: $phase1Orfs_1")
if (!phase2Orfs_1.isEmpty) println(s"Phase 2 orfs: $phase2Orfs_1")
if (!phase3Orfs_1.isEmpty) println(s"Phase 3 orfs: $phase3Orfs_1")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment