Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus
Last active December 15, 2015 08:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sir-deenicus/5230432 to your computer and use it in GitHub Desktop.
Save sir-deenicus/5230432 to your computer and use it in GitHub Desktop.
open System
open Prelude
open System.Text.RegularExpressions
open System.IO
open System.Windows
let sw = Diagnostics.Stopwatch()
/////////////////////////////////////////////////////////////
let countsymbols (s : string) =
s |> Seq.groupBy id
|> Seq.map (fun (c, cs) -> c, cs |> Seq.length |> float)
|> Seq.sortBy fst
String.Join(" ",
countsymbols "ACCACATCATTGTATTATCAATATTGCTCCTAGGACAGCCTTAGGATTTGGCCTGAGGTAGCTCAATAGAGGTATGATCCATCCTAGTAAATGATCTGATCTAGAATGGGTCTCTTGTCTAACCTGTCGCTCCGCAGTTAGTATTACGCATAACCCTTACGAAGAATTGGTCCGGCTTAACGTAAACTCTTTCCACTTCGCGACTGTGTTATGAACAGTAGTGCCATGATGGGGGAATGGCCCTCATCGGTCTGCATATCTCAGGAGAATACCCGCGAGCGTTATGAGGGGGGAGTTCGTCTATCCGCACCTCTCGGTGCGGGTTTGAGGTAGGCAGAGCCGGGTGCGGGCGCGCGATTCACGCCCTTTTTGACTATGGGCGTGAGAAGGGAAAATGCCCAAACTAGTTACAGGCGAAAACTCCTATACGTATTAATTTATACCTACGGGAATGGTAGAATTATTACACGTTAGCCGCTTGGATTGACGCCAAAGGCGATTAGGCGTCCCCGCATGATCCGGGCATTAAAGTGTTAAGTCACACGGGCGATGGGATCCCCGTCCTTCTGTACTCCCGTAATCGGTAGTTGTCCACCTTGGATCGCCGAATGCTGAACATTGACAAACCTACTTCGATACTAAACATAATCTATACGCTTTATTTCCCGCATTGTGTTTTTCGGATGATTATTCGCGCGCATTTCAGAACGCATCTGCGCGCAGCTTTGGGGGCAATAAATCGAGTTGATAAGCGGGCAGGTGTGAAACGTTGGGCCCAACCTCTATGAAATATACCCGCGGATAATTGACGAGACTTGGATCCATTGCATATGTCTCTTTCGCACCGTACAGAGTGTATATTCGTGGGCAAACGATATGTTCTGTCCAAATTCGCTTTATTCGACGTTCCAGGCGATGATCAGGCCATAACAATAAGGTCGTGTTCAGCGGCCAACTCCGGCCTGGA"
|> Seq.map snd)
/////////////////////////////////////////////////////////////
let transcribe (s:string) = s.Replace("T", "U")
transcribe "GATGGAACTTGACTACGTAAATT"
transcribe "TGTGGGGTAGCCAGTGGATGCTCCGAAATATCAAGAAGCTGCGGCTACGCGTTTGGAAATACGCAATGGCTGGCGCAGGAGTCTTCATAAATGTGCTCTGTTCTAATTCGAGTCCTCCGGCCTGCAAACAGAAATCATAAAGATACCTTCATAATACTGTTGCAACTGCCCCTGGAACCCCAGGGAATGACACGACAATTTTCGCCGTCATGCGTGTAGTGGGAAGTTCTGTCCGTTGGCGTTCCGCATGGAGAGGCTTTCGTCCCACGCTCTAGTTTAACACGCTGCACACAGAGAGCCAACCTTTTGAAGCAGGCCACGTATGCCACCCGTTCATAGTCAACGTCACTCAAGTTGGTGATGAAACCAGTTTCCCGAAGGTAGCCAGCTTCTCGGGAATAACGTGTGCTTGGGTCGTAAAAACGACCTTGGAGTAATCTGTCTACACTGGTGACCTGGAAATCAGAAGTAGTAACACTACGATAGTGCGCTACAATGAATTCTAAGTGTCCCCGGTGGTGTGTAAGCTGGCACAACATGTGGCGGGCAGTTTCAATATTGAGAGGGTCATAATACGCCGGTTGCGTCAGGCTATTTAGCACCATGCCGGGCGAAACGAGGCGTATCGCCAGGGCGAACCGGCCCAACCACAGGTTTTAAATAAAAACGTTTATCAAAACAGGGGGGCGACTAATCCCAGCTAAGGTGGGCGAAATGACTGTAAATTTCGAGTGTTTCCAACCTCAACGTACTAGATATGACTAACAAGACAACCTAATGGCAACGCGCGCTAGGTCAATTTAGGACCGCCTGAGAGGCAGAGCCACCACCCAATCTATAGTATGGCTTCAAGGTCGCTTTTTTAGGCGGGCGGATCACCGCCTAGGACCATCTGGGTAAATATCATGCTGGTATTAGGACAATCCATAATAACCCCGGGTTTCGTCGGAGTTTGAACGGTTTGCCGCAGTCACATCCGGGCCCATAATAGGGG"
/////////////////////////////////////////////////////////////
let rev_complement (s : string) =
String.Join("", s |> List.ofSeq
|> List.rev
|> List.map (function | 'A' -> 'T' | 'G' -> 'C' | 'C' -> 'G' | 'T' -> 'A' | _ -> ' '))
rev_complement "TTGTCTGAGGAACCCTTTGCTGCAAGAAAGGTACAGCGCAATAATATAGCTCGTATTGCCGCAAGCCCAGCAGCAGCCTACCAAGGCCTGTAGAAGTATGCGTAAGCATGTTCGCTATCGAAGTTCCAAGTCCGCGAAAGCGAGAGGTAGCGCTCTCTGCGAATTGGTGCGACAGAGTTTTTCTTTCATTTGATCGTCAAAAGCATTGCGGTCCGCACAAAGCATAACTCCTCACAGATTAGATTACGGTGTTTTTCGAATAGCCATACGCTGAATGCCGCACTTTGTCTTTCGCCCCGGCGTAACTCGCAGCACAGCTCATGCTTATTACCGCTCCGTGATGACTTTACCCAACTTATTAAACGTAGTGTATTCATCTTATCCTACGAGATGTGCGCAGGGTCCGGCACTAGACCTTGGGGGAGGACTGTTTAGGCTTTTGGTCTTCGTAATAGCGTATCGGTACCCGCCCGCTGTATGACCGCTTGCAACACTACTCACACCCCATCTAACTCTTTCGCCCTAGGGAGGCCGAAGGAAGTACCACGCGCTTTGCCCCCTGGGAAGCTAAAGAGGCGAAAGGTTAAGTTTTATTTCCCCATTAGTGGTGCCGACCCCGCATAAGGCCATTTAGGTCTCGGGACACACTAAAAGACATCTCTGGTTACCATTTATGCTTAAGCTCGCCATGCGAACGGCTGGATCCGCGACTGAGGGCCTCTCCTAACCCCGTTCAGAGCGTAAAAGTCCCCCAGATTGTGACAGGTCTCTCCGGACATAAATCCAGAAACGGATGACGTAAAGTTGAGGGTGGAGG"
/////////////////////////////////////////////////////////////
let hamm a b = HelperFunctions.hamming (a |> Prelude.charArr) (b |> Prelude.charArr)
hamm "TCTAAGTATCCCCTCACCCGACGATACAGGGAGGCATAAAGGTATCTAACCTAATTGACCGGTTCTCTACGCCTGGGTCAGCCTGCTCCGGTACCGTGGATTTCGCTCTTCCAATCGGTCAAGATACCCATATGTTAGGACCCCCTTCTCTAGGACCCTTTGTTCTTTTTAGCGGGGTTTATGGAAGAGAATTAAGTGTCCTGGGGTTAGGTGCCGTCGCTACTAGTAAGGTACATGAAGAGTCGCGATTCCACCGGTTTTGCGACGACGCAACACGTGGCCTTACTGTCTAGATAGGCATAGAACTTACTTATGAAAACCTTGTCGCGTGCCCTCTATCTGGCCAGGGGATAGCTGTGAAGGTCCTCGACGGTCGGGGCCTTTGTATGCTCATGTAAAAGACCTAGCGGTTATCAATCACTCCCTACTCACCCACTAGCTAATCCGGCTATGTTCGACCTCAGAGACCTCTTCCCAGTGTGTTTTAAACCACCGGTCAGTGAAGGGCACGACGATCATCGGCGGCTCCTTAAACACCCCCTCGTCGACCCACAGAAATAGTACTATATTATGTTACCGACAGCCCGAATGAGCTCCCCCACTCCGGAATGCTATCGTTTGGCGGAATCGGTAAATTCATTAAAACGTCTTTTTAATTCCAGCACCCTCATCTGCCGTGCCGCTTGCAGTGTAGGAGCATCATCTTCAGTCACCGATTCTGTCCACCACCAGGCCGCTACAGAAGTCCCACCCGCCGGCTATTCATCGCCAATGTCCAGTCCCCCCAGTCCTCATTAAATGTATTTTACTGGAACGAGCCAGCATATCAAGGCTAGACTAGTGGACACCGAACGCAATGTGACCTTAGCTACCATGCATTGCCGGCAACGTATCAGTTCGTTTATGATTCGTCACTGTGGATTGTGTATCATCGGATTAAGGTCGGCCTAATATGCTAATTAAC"
"CAAAAGGTTTGCTCCACATGTCGACAAAGGGCGAAAACGAAGTTTCTATGCCGAGTGGTCCGTCGTTTCCTTCGTGTCCAGCCTGCCCCGGGAATGAGGGATCCTCTCGTCGAGTGAGTCTTAGTGCCGCACAGCGACCAGCTACCTCTCTAACACGATGTAGTAATTTTTTAAATGTCTATGCTGCATAGCTGAATGCCGTAGGGTTAGAGGCGGCCACTAAATGTGCAATCCCGCGAGTGAACCGGATTAAAGGGTCTCATGCGACCGCTGGAGGAGGTTGGAGTATCATAGAAGTCAAGGTTGATACATATTTGAAAGTGCTCACGTCCCTTGTATGTTGTCATGGACTGGCCTTGCAGTGTCTCTACGGCTGGAGCGTTTCTTCGTTGTTGTGGAGAACGCAGTATTCGCCAATCGCCACATAATCACAGAAGCTCTCATCGTGCAAGTTTTGCCCTGGCTGACTCCTTCAAATTGCGCTATTCACTACCGGTGAGCGTATAACACAGACTTGATCCGGGCCGACTAAAACACTCGTCCCTTCAAACACAAAGGGAGGCTTGTAATTGCTTGACGAAGCCGCCAAACTGTCTAAGCGCTTTATCGAAGACCCTTGGTGCGTAATAATCGTCTGCACCCGTCGATCTAATTAAGCGAAGCGCCCTCACCGGTGGTGTCCTTCGCGAGTTAGGATCAGGATGTAGATTTATCGCTCTGGTCTCCCCAACAGGAGATACAATATTCTATCCTAACGGCTACTGATCGGCAAGCCGGTTCCCCCACATTCTGGCTTAACTGCCATTTCCTGGCATTTACCTGAAGAGCCAAGCAAAACTAGTGTTCACGATTGGAAATGGTGTATTACGCAACATAAACTCGCGGACAACAATCCCTTAATACAAGAGCTTTCAGTGTAGATGGTCTATCATTGCGTTGAAGTCCACCTAATCTGCCTACTAAC"
/////////////////////////////////////////////////////////////
// From: http://stackoverflow.com/questions/286427/calculating-permutations-in-f
// Much faster than anything else I've tested
let rec insertions x = function
| [] -> [[x]]
| (y :: ys) as l -> (x::l)::(List.map (fun x -> y::x) (insertions x ys))
let rec permutations = function
| [] -> seq [ [] ]
| x :: xs -> Seq.concat (Seq.map (insertions x) (permutations xs))
let perms n =
let pers = permutations [1..n]
let plen = pers |> Seq.length |> string
pers |> Seq.fold (fun str perm -> str + String.Join(" ", perm) + "\n") (plen + "\n")
File.WriteAllText(dir + "perms.txt", perms 7)
/////////////////////////////////////////////////////////////
let str = @">Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
>Rosalind_5959
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
ATATCCATTTGTCAGCAGACACGC
>Rosalind_0808
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT"
let gc_content (s:string) =
let dnas = s.Split('>')
dnas.[1..] |> Array.map (fun dna -> let dat = Regex.Split(dna,("(?<=\d)\n"))
dat.[0], let sqs = Regex.Replace(dat.[1],"\s", "")
|> countsymbols
|> Map.ofSeq in
(sqs.['G'] + sqs.['C']) / sumMap sqs)
let topgc s =
let id, gc = gc_content s |> Array.maxBy snd
sprintf "%s\n%f%%" id (gc * 100.)
topgc str
/////////////////////////////////////////////////////////////
let pfromGc gc = 2. * ((gc/2.) **2. + ((1. - gc) / 2.) **2.)
String.Join(" ", "0.000 0.066 0.182 0.199 0.272 0.369 0.413 0.466 0.519 0.572 0.658 0.703 0.801 0.830 0.894 1.000".Split(' ')
|> Array.map (float >> pfromGc))
/////////////////////////////////////////////////
let rnacodonTable =
splitstr [|"\n"; " "|] @"UUU F CUU L AUU I GUU V
UUC F CUC L AUC I GUC V
UUA L CUA L AUA I GUA V
UUG L CUG L AUG M GUG V
UCU S CCU P ACU T GCU A
UCC S CCC P ACC T GCC A
UCA S CCA P ACA T GCA A
UCG S CCG P ACG T GCG A
UAU Y CAU H AAU N GAU D
UAC Y CAC H AAC N GAC D
UAA Stop CAA Q AAA K GAA E
UAG Stop CAG Q AAG K GAG E
UGU C CGU R AGU S GGU G
UGC C CGC R AGC S GGC G
UGA Stop CGA R AGA R GGA G
UGG W CGG R AGG R GGG G"
|> Array.map (fun (str:string) -> let code = str.Trim().Split(' ') in code.[0], code.[1])
|> Map.ofArray
let translate (rna : string) =
String.Join("", [| for i in 0..3..rna.Length - 4 -> rnacodonTable.[rna.[i..i + 2]] |])
translate "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"
translate (File.ReadAllText(dir+"rosalind_prot.txt"))
///////////////////////////////////////////////////////
let motifs (s:string) (sub : string) =
let rec findall output = function
| (i:int) -> let pos = s.IndexOf(sub, i)
if pos = -1 then output else findall (pos + 1 :: output) (pos + 1)
findall [] 0 |> List.rev
String.Join(" ", motifs
"CTTTAAACGACATGACCCCAACGACATTGAACGACATCAGCCGTCTCAACGACATTACGGCAACGACAAAACGACATCTCCTGCGAAACGACAAACGACAATAACGACACAACGACATCAACGACAAACGACACAAACGACAGAACGACAGAACGACACGATATTAAACGACAGGAAACGACATAACGACAGCGTCCTAGTTTACAGAACGACAAGAGTCGATTCAACGACATTAACGACACGAACGACATACTAAGACTAACGACAAACGACATAAACGACAGTTGTCAAACGACAAACGACAGACATCATAAACGACACTAACGACACGTAAGAACGACAAACAACGACACAAACGACAAACGACAGGTTAACGACATAGAACGACAAAACGACATCTTAACGACATAACGACACATTAACGACAATTCATGAACGACAATAATACTAACGACAAACGACACCAACGACATAACGACAAACGACATCTATACAACAACGACAGCAGCTAAACGACATTAACGACATAACGACAAACGACATAAACGACATAACGACATCCTAACGACAGTAACGACACCAACGACATCCTTGCAACGACACGTGCAACGACACTGAAACGACAAACGACATTCCTTGAACGACAAACGACAAACGACATCAACGACACCAACGACAAACGACAAACGACAAACGACATCGCAACGACAAACGACAAACGACATAACGACATTAAAAGGAACGACAACGCAACGACAGAACGACAAAACGACAGGAAACGACAGAACGACACCGCAGGATGCCGATAATGCCGGCAACGACAAACGACAAACGACACAACGACA"
"AACGACAAA")
////////////////
let motifProb (r:int) n m gc = Math.Round((pfromGc gc) ** n * (m - n + 1.), r)
let dat = "0.000 0.105 0.165 0.219 0.257 0.310 0.388 0.412 0.465 0.518 0.576 0.640 0.680 0.747 0.789 0.854 0.921 1.000".Split(' ') |> Array.map float
String.Join(" ", dat |> Array.map (motifProb 3 8. 9093.))
(*
Go through half the string. from both directions at once
add to a lstr and an rstr. compare lstr to rstr, if they are equal set a flag. if not equal continue
if the next char is not equal then end. else continue
*)
let borderLen (str:string) =
let rec findfail state lstr rstr i =
let lstr', rstr' = lstr + string str.[i], string str.[str.Length - 1 - i] + rstr
let state' = lstr' = rstr'
match state' with
| false when state = true -> lstr'.Length - 1
| _ when i >= str.Length / 2 - 1 -> if state' then lstr'.Length else 0
| _ -> findfail state' lstr' rstr' (i + 1)
findfail false "" "" 0
let failureArray (s:string) =
let b = Array.create (s.Length + 1) 0 in b.[0] <- -1
let rec seeklongest i = function
| -1 -> 0
| j -> if s.[j] <> s.[i] then seeklongest i b.[j] else j + 1
for i in 1..s.Length do
b.[i] <- seeklongest (i - 1) b.[i - 1]
b.[1..]
"CAGTAAGCAGGGACTG" |> failureArray
let str1 = File.ReadAllText(dir+"rosalind_kmp.txt").Trim()
let res = String.Join ( " " ,failureArray str1)
File.WriteAllText(dir+"roskmpout.txt", res)
///////////////////////////////
let sqstr = @"GATTACA
TAGACCA
ATACA"
let dats = sqstr.Split('\n')
let dats2 = [|"crayon"; "raygun"; "crayfish";"crapton"|]
let dats3 = [| "pinging"; "dinging"|]
let commonSubstrs (strings : string []) =
let start = HelperFunctions.longestCommonSubstring strings.[0] strings.[1]
strings.[2..] |> Array.fold (fun (fset) curword -> fset |> Set.map (fun w -> HelperFunctions.longestCommonSubstring w curword)
|> Set.unionMany) start
let str1 = File.ReadAllText(dir+"rosalind_lcs.txt").Trim()
let dats = str1.Split('\n')
let lcs = commonSubstrs dats
///////////
////////////////////////////////////////////////
let rec listPermutations = function
| [] -> Seq.singleton []
| (l::ls) -> seq { for x in l do for xs in listPermutations ls do yield (x::xs) }
let lexic n (alpharaw:string) =
let alpha = alpharaw.Replace(" " , "")
let sq = listPermutations [for i in 0..n - 1 -> alpha]
|> Seq.toArray
|> Array.map (fun cl -> String.Join("", cl))
let order (alph:string) = alph |> Seq.mapi (fun i c -> c, i) |> Map.ofSeq
let lookup (order:Map<char,int>) char = order.[char]
let lexorder = order alpha
sq |> Array.sortBy (fun (str : string) -> str |> Seq.map (lookup lexorder) |> Array.ofSeq)
let res = String.Join("\n", (lexic 3 "J T Y G D N"))
let lexic2 k n (alpharaw:string) =
let alpha = alpharaw.Replace(" " , "")
let sq = [for m in k..n -> listPermutations [for i in 0..m - 1 -> alpha] ]
|> Seq.concat
|> Seq.toArray
|> Array.map (fun cl -> String.Join("", cl))
let order (alph:string) = alph |> Seq.mapi (fun i c -> c, i) |> Map.ofSeq
let lookup (order:Map<char,int>) char = order.[char]
let lexorder = order alpha
sq |> Array.sortBy (fun (str : string) -> str |> Seq.map (lookup lexorder)
|> List.ofSeq)
let res2 = String.Join("\n", (lexic2 1 4 "Q W U L P H X I S C G"))
Clipboard.SetText(res2)
//////
let codeexons (dna : string) (introns: string[]) =
introns |> Array.fold (fun (splicedDna:string) curintron ->
splicedDna.Replace(curintron, "")) dna
let dna = "ATGTGCGAACAAAATGGCGTAAAAGTCAAGGTTGTCGGCTCAGAGTGTCTATAACATTATCAAGTGATGCCGGATATTTGAATCTCGCAGCACCAGCAAGAATAATGGTTTTAGAGAATGAGACCTTGACGGCGCGGAATAGAGGGGTGACCCCCTCCATAGTCGTTCCAACAACGGGAATCATACTAGAGTCCTACCACGACGCCAATATTAACTCGATCCGGGGGCACGGACCATATGCAACGTCTCCGCCAATTCCCGCCAAGCTCAAGAGGGTAACCATGGTCCAGGAAGCGTCCTGGAACACCCAGCATCGAAGGTTGGAGCAGTGGAGGTTCACTTATACTAACAGACTCGGAGTGATAAAAGTCGACTGCGGTATTCTGGTATATACATCGCCTAGGTAGTGTTTTGATTCACCACCTTGAGGGAATCCGGATGTTCCGCGACCTCACGATTCTCAAATTAGAAACAATAGATCAGCAACGACTGTAGACACCGTGGTTGATATACAAAGGCTCTTAAGCTGCAAGCCTAAAACGTGTGTGTCCCGGCTTACAGGGACCTGGGCCCAGGCATGAGAAGATACAGAAGGCTAAGAGACCGGGAGCTTGACTTGGCTGCAAGTAGCCAAAGTGACTCCTCAACGTGTATACACTAGATGCAGTCGAATTGGGCCGCGGTACCGGGGGGGAAACACAATCCGAAATACTTCATCGAACCTTTACATATTTGATCAGAGACGCAGTGGATTCCCTGGTCTACGATTCATTGTCGGTATCCGAAATGGTCCCTACCAACTGGGCATCCACTACGTTTACATGACTGGTCTTTGGGCTCGTGCCAAGAACCGGAATAGGTATAGATGCGCTACGCTCCACGTGAAAGCACGTGGTATATCTGGAGTGAGCACCTCGCCCACGTCCGGAACGCTGCCGCATAA"
let introns = @"CTTTACATATTTGA
AGCTGCAAGC
TTAGAGAATGAGACCTTGACGGCGCGGAATAGAGGGGTGACCCCCTCCAT
GCTACGCTCCACGTGAAAGCACGTGG
CTAACAGACTCGGAGTGATAAAAGTCGACTGCGGTATTCTGGTAT
CACTACGTTTACATGACTGGTCTTTGGGCTCGT
ACTAGATGCAGTCGAATTGGGCCGCGGTACCGGGGGGGAAACACA
AAGGCTAAGAGACC
TCAGAGTGTCTATAACATTATCAAGTGATGCCGGA
CCCTGGTCTACGATTC
CGCGACCTCACGATTCTCAAATTAGA
CTACCACGACGCCA
TCAAGAGGGTAACCATGGTCCAGGAAG".Split('\n')
let res2 = (dna, introns) ||> codeexons |> transcribe |> translate
/////////////////////
let mmasstable = splitstr [|"\n"|] @"A 71.03711
C 103.00919
D 115.02694
E 129.04259
F 147.06841
G 57.02146
H 137.05891
I 113.08406
K 128.09496
L 113.08406
M 131.04049
N 114.04293
P 97.05276
Q 128.05858
R 156.10111
S 87.03203
T 101.04768
V 99.06841
W 186.07931
Y 163.06333" |> Array.map (fun s -> let spl = splitstr [|" "|] s in char spl.[0], float spl.[1]) |> Map.ofArray
let lookupmass c = mmasstable.[c]
let proteinmass (protein:string) = protein |> Seq.fold (flip (lookupmass >> (+))) 0.
File.ReadAllText(dir+"rosalind_prtm.txt").Trim() |> proteinmass
/////////////////
let topermMap (ps : int []) = ps |> Array.mapi (fun i x -> i + 1, x) |> Map.ofArray
let inversePerm (p) = p |> Map.toArray |> Array.map swap |> Map.ofArray
let (<*>) (a:Map<int, int>) (b:Map<int,int>) = [| for i in 1..a.Count -> i, a.[b.[i]] |] |> Map.ofArray
let permAsline a = a |> Map.toArray |> Array.map snd
let breakPoints (p : int []) =
p |> Array.fold (fun (numPoints, i) x ->
let adj = match i with
| 0 -> int (abs(0 - x))
| y when y = p.Length - 1 -> (abs (y + 2) - x)
| _ -> abs(p.[i + 1] - x)
if adj <> 1 then (numPoints + 1), (i + 1) else numPoints, (i+1)) (0, 0)
let permuted (workarr:int[]) i j =
let mid = workarr.[i..j] |> Array.rev
let left, right = (if i = 0 then Array.empty else workarr.[0..(i-1)]),
if j = 9 then Array.empty else workarr.[j + 1..9]
let n = Array.concat [left; mid ;right]
let score,_ = breakPoints n
n, score
let rec search cnarr best i = function
| w when w <= (9 - i) ->
let obest, a = best
let na, sc = permuted cnarr i (i + w)
search cnarr (if sc = obest then
(sc, na :: a)
elif sc < obest then
(sc, [na])
else best) i (w+1)
| _ -> best
let computePerm cnarr =
let rec intervs b = function
| i when i <= 8 -> intervs (search cnarr b i 1) (i+1)
| _ -> b
intervs (Int32.MaxValue, [[||]]) 0
let rec pickDepth isroot depth (arrs : int [] list) =
match arrs with
| [x] -> if isroot then 0, x
else let sc, lst = computePerm x
if sc = 0 then
depth, lst.Head
else match lst with
| [x] -> depth, lst.Head
| x -> pickDepth false (depth + 1) lst
| x -> let sc = x |> List.map (fun d -> computePerm d, d)
let tsc, zz = sc |> List.minBy (fst >> fst) |> fst
if tsc = 0 then
depth, zz.Head
else let nn = match (sc |> List.filter (fun z -> z |> fst |> fst <= tsc )) with
| [p] -> depth, p |> fst |> snd |> List.head
| lsf -> lsf |> List.map (fun ((_,d),_) -> pickDepth false (depth + 1) d)
|> List.minBy fst
nn
let rec searchall t narr count =
if HelperFunctions.hamming t narr = 0 then printfn "%A" narr;printfn "%A" t; count
else
let _, bestArrs = computePerm narr
let ta, topn = pickDepth true 1 bestArrs
searchall t topn (count + 1 + ta)
let alls = @"9 3 10 7 2 5 6 4 1 8
2 9 10 8 4 7 5 6 3 1".Split([|"\n"|] , StringSplitOptions.RemoveEmptyEntries) |> Array.map (fun (s:string) -> s.Split(' ') |> Array.map int)
sw.Restart()
let xuse = [| for i in 0..0 do
let pi = ((alls.[i + 1] |> topermMap |> inversePerm) <*> (alls.[i] |> topermMap)) |> permAsline
yield searchall [|1..10|] pi 0 |]
sw.Stop()
sw.Elapsed
// original depth search rev order min a b
//rr2 8 6 8 6 5 -> 8 5 8 5 5 -> 5 7 8 4 5 5 5 8 4 5 5 5 7 3 5
//rr 7 5 6 8 4 -> 6 5 6 8 4 -> 7 5 7 8 7 6 5 6 8 4 6 5 6 7 4
//site 9 4 5 7 0 -> 9 4 5 7 0 -> 9 4 6 7 0 9 4 5 7 0 9 4 5 7 0
//rr5 N/A N/A N/A 7 7 5 8 5 7 6 5 7 5
/////////////////////////////
let intToCodons = function 0 -> "A" | 1 -> "C" | 2 -> "G" | 3 -> "T" | _ -> ""
let consensus (dnas:string []) =
let len = dnas.[0].Length
let A = Array.create len 0
let G = Array.create len 0
let C = Array.create len 0
let T = Array.create len 0
let letterPairs = ['A', A; 'G',G;'C',C;'T',T]
let letterMap = letterPairs |> Map.ofList
dnas |> Array.iter (fun dna ->
dna |> String.iteri (fun i letter -> let pvec = letterMap.[letter]
pvec.[i] <- pvec.[i] + 1))
[ for i in 0..len - 1 -> letterPairs |> List.maxBy (fun (_, dat) -> dat.[i]) |> fst ],
[|A;C;G;T|]
let d = File.ReadAllLines(dir+"rosalind_cons.txt")
let a,b = d |> consensus
Clipboard.SetText(String.Join("", a))
String.Join("\n", [for i in 0..3 -> intToCodons i + ": " + String.Join (" ", b.[i])]) |> Clipboard.SetText
/////////////////////////
let readFASTA (s:string) =
let dnas = s.Split('>')
printfn "%A" dnas.[0]
dnas.[1..] |> Array.map (fun dna -> let dats = Regex.Split(dna,("(?<=\d)[\r\n|\n]"))
dats.[0], Regex.Replace(dats.[1],"\s", ""))
let overlapGraph k (data : (string * string) []) =
data |> Array.fold (
fun (edgelist, i) (id,dna) ->
data.[i + 1..] |> Array.fold (
fun edgelistInner (idComp, dnaComp) ->
if dna.[dna.Length - k..] = dnaComp.[0..k - 1] then
//printfn "%A" (k,id, idComp,dna, dnaComp, dna.[dna.Length - k..], dnaComp.[0..k - 1])
((id, idComp), (-1, dna.Length - k)):: edgelistInner
elif dnaComp.[dnaComp.Length - k..] = dna.[0..k - 1] then
// printfn "%A" (k, dna,idComp,id, dnaComp, dnaComp.[dnaComp.Length - k..], dna.[0..k - 1] )
((idComp, id), (1, dnaComp.Length - k)) :: edgelistInner
else edgelistInner) edgelist, i + 1 ) ([], 0)
let dset = @">Rosalind_0498
AAATAAA
>Rosalind_2391
AAATTTT
>Rosalind_2323
TTTTCCC
>Rosalind_0442
AAATCCC
>Rosalind_5013
GGGTGGG" |> readFASTA
let dnastrs = File.ReadAllText(dir+"rosalind_grph.txt").Trim()
let dset = dnastrs |> readFASTA
let dispList (elist : ((string * string)*(int*int)) list) =
elist |> List.rev
|> List.map (fun ((s1,s2),_) -> s1 + " " + s2)
String.Join("\n",overlapGraph 3 dset |> fst |> dispList) |> Clipboard.SetText
/////////////////////
let inverseRNATable = rnacodonTable |> Seq.groupBy keyValueToValue
|> Seq.map (fun (pcode, codons) -> pcode , codons |> Seq.length)
|> Map.ofSeq
let inferRNA n (protein : string) =
protein |> Seq.fold (fun count letter -> (inverseRNATable.[string letter] * count) % n) inverseRNATable.["Stop"]
"MQTIINFIPCHKGAGDVMEAPDIPNTNETSVPKVDLYTLGPRQDNSGREGPNKRFRVHTWRFMISFHMMMKQPKCLWNVHDLHHFWSHAENFKMRETITSCDYPADVGNNRLAREHKWTFTGGTHMYLCRKCSTQKYYLEASEDKRAAKITMCIIYLTIVVTFFTSGNRCTVHWSFFLYEATHYHPIAPWLQVSINCYGCAGWCNRQIVICIGRLFINKNCHNGLRHPIAHENMYQQAASEAVDFYLMMMDRWEDNGWWVTSIQIIPELDRRPYVSHVWYCWLCCDTCWWKHKNRNPRKNNKASAKFNTGNTKNPLMNVMKSNFWMVTQRAISTHNYNWEWGEDRYCVCKTTPFRRSNKQDRGNRDIVAEWKHTEYNPWKQMPHKKWVWRFRDCHGYRRSNHCQMFSLSMWIELPVMLDTPMDMIDVCGGNKRLFPPRVWWWLMTSGMGQSQGMMCRRMPDHCSRFTYGLKHWEPSAFMWKAEHWKGHSSLFDLFLAKPRGAVYQDIVEYMHWPYIKISEGKWWTGPIKEYDSNQLAVFVLDPWSFMEQKEDKMVKQGTKCQSNRTGWGFVCVWNLKEWQPGWETWQPQLFTMAKMHGSNHNCQQPGVKTCCNKCSRLHIHPKLVFFDPYLQSYMGNPKVCNPYCQVPKRNQEDKWYCGMQLHDHTRSRSRMLDDVIWSSVYAKHTWKCVMLYMPPGYKVLESGLDMSMDESHSACPLHQNQPFVNQFQHYGRWGHWVNNFQHPELGIFMWIRRNSWHNQHWELVECHFDKVPQRHFFADVTKAYQSGCVGMVMRNSSMTWAKKRLFFRAIQDRLNMAISCWKSVPEHQNMGNFPWTWAGVSMPKTTLMVWVVGMMQEQFMFTADHMYLTAQAGPKFRDNNCTVWHNADFDHTRGDSWTMLALEIKEVIGEGDIEYSRYGDNTPPVNHSNEEFFWMTRQGMIVGDPILTFGIDQKEYMHEEWHERWIHQTQYFIPDCCQTNARCEIQCDTIRMFPWTY"
|> inferRNA 1000000
////////////////////
let joinstr (sep:string) (a:'a seq) = String.Join(sep, a)
let splicedMotif (dna:string) (comp:string) =
let rec seekMotif indices i = function
| j when i = comp.Length || j = dna.Length -> indices
| j when dna.[j] = comp.[i] -> seekMotif (j + 1::indices) (i+1) (j + 1)
| j -> seekMotif indices i (j + 1)
seekMotif [] 0 0
splicedMotif "ACGTACGTGACG" "GTA" |> List.rev |> joinstr " "
let s = "CCCTCGTGACATGTACGGCACAACCCTATTTGTCTTTGAGGGGAAAGTTACACTTTGTCCAAAACAGGTGCGAAATGATGACACAACGGTGGTGAAATGCGTTAATAGAGAGGGCAAACGGTGTAACAGCAACGTGGCGCTTAGTACTTGCGTGAGCGCACCCCTACCGAACTACGATCTATTGGGGGGCAATGTCCCCTTCTTTGTCTCCACGCAAACTTGGGGCTCGAACGCCTACCCCGCGTGCGCCTGGCTTTGCGGCAGCGACTCCTCACTGGGTTAGGTAACCCATTTAATTGGTTCATATGGTCCTCCGCCAGCCCGCTATTTACGTAAACCCATTCGAGTCAGCATCGTGCGGAGGAATCGATTCTAAAGGCAGGATCCTTACGACGGTGAACTGTCAGGCGAGGGATTTCTGTAGGTCTATCGAGTGAGTACCGTTATCTCTTAAAGCACCTTACGGCCCAGAGGCCCGAACACGGACATCCATGATGGTAGGGCGGTGCCCAGTGACAGACTATGGAAAACATTATAACTGGCACTACTTTCGAAGAGGTCGAGACTGAGCAGTCATTTGTTATAAGATCTGCTCTTAACATGCGGATCTGGGCAGGCCGTACTGAAGACCTTGACTTGCCATAAGCGCGCCACTGAAGTTATGGTACACAAGTTTCAGCCACAATGAAAAAACCGGGGGGGCCACCCCAGAAATTCCCGTCGGGCAAAGTGTGAACATAAGTACTCACATGTAGGTGTGTAACAAGATGGGGTGACTGAATGTTTCCCGTATGTTGCGTAAGATCTAAGCTGGTACTAGCACAACTGTACGAAGGTCAGGTACTAGCACAACATGGTTCTTGCTGGGCTGGAATCCCGAGTCGCTACCAATTGAAACGTACATGTGG"
let t = "GATTAAAAGACCCACAGTCCGGTCTGATATCAGAAGGTCACGGCACTTTTTAAGGCGATTTTTGCAATTAAG"
splicedMotif s t |> List.rev |> joinstr " "
///////////////////////////////////////
splitstr [|"TAG"; "TAA"; "TGA"|]
let dna1 = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG"
let dna11 = "CGGATTTGTAGAACTCACAGGGCGCTCTTTACAGGATGTGCCGGTATATGCATACCCTAGATGAAGTCATCTCCCACCGATCGCGTAGCAGCTCGGTGGCAAAACATTAAGCCAGTATGAACCAACACGACTGCATCTTACTTCTCTTGCTTGACGTATTTATTGAAAGTGAAATGTCACTAGCAGGGATTCGTCCTTACTCAGCCTGGCTGGCAACGACTCCCTTCCATTAGCTTCTGCAGAGAAGTACTGAGCTGTTCGCTCCAAGCAGCATGGTGGGACAAACCCTTGATTGGGCGTGGATGGCGAGCCCGATTACAAAGGGATGGGTTTTCTGTGTGCTTATCAAATCTCCAATATCCTCAGCGGCGCTGAAGGTCTATACACCCAGAGACTAACGTGTTAACTACAACCGGCTATTCTAATGGGTATACCGGCCCGTTTGCCTTTCAATGAAGGGGCAAATAGTTCACGTGAGTGGCGCATAGCTATGCGCCACTCACGTGAACTATTTGCCCCTTCATACGCCCCTTAGCCTCGCTGATTCACTGACGGCCGTGGTGACTGCAGCGGAAGTTTTAGGGGGACTACTTGCTGGGCCTTTATTAGCGGGTTCGCTGGGATAGCCATCGTCGATTCGACCATCGGCCCTGAGCGGTGTGTCGCCGCGGGATCCAGACTGAGGGTTGCTTTGCGAATTTGTCAACATGCATCTCTTGGAGCTTACGTACACCGAGTGGAGAGGACCTACCGCGATAATGCATGACTTGCACATCTTCTTGCACATGACTAGGTGCCTACGCCTCCCGGATCGTTACTCGGATGGACTCTCTTAGCTCACCCTGTATAGTGGTGTAGCTGAGCACCGAGCTTATGCGGTGAAATCGCGCGCTTTGGTTGATCGAACCTACTGGTTTATTCATTACCGGACTACCTGGTCCTTGCTCCCGAGCACATGTCCCTGGCTAAACCCACC"
let rec frames flist (rna:string) =
let start = rna.IndexOf("AUG")
if start = -1 then flist
else let prot = rna.[start..] |> translate
let stop = prot.IndexOf("Stop")
if stop = -1 then flist
else frames (prot.[0..stop - 1] :: flist) rna.[start + 3..]
let f1 = dna11 |> transcribe |> frames [] |> set
let f2 = dna11 |> rev_complement |> transcribe |> frames [] |> set
String.Join("\n", Set.union f1 f2) |> Clipboard.SetText
////////////
let signPermutation (permutation : int list) =
let len = permutation.Length
let calc = 2. ** (float len) |> int
let signs = [|0..calc - 1|] |> Array.map (fun i -> let s = Convert.ToString(i,2) in String.replicate (len - s.Length) "0" + s)
[ for i in 0..calc - 1 -> permutation |> List.mapi (fun j k -> if signs.[i].[j] = '1' then -k else k)]
let t = permutations [1..3] |> Seq.collect signPermutation |> Array.ofSeq
String.Join("\n", t |> Array.map (fun s -> String.Join(" ",s))) |> Clipboard.SetText
/////////////////
let dnastr = "TACTGCGCGCGTTTACGATCTTTCTTTCAGCCGCGTGGTGGCAGGTCTCCACTCCTATCACGTTACAAATTTCTATTCGCTCTACTACATCATGTCAGGTTCGTCTTACACGTACTCCGACCTTTGGTACTCCCGTAGCCCCCCACCCGTCTCACACGTCTTTTGAAATTTTACGATCTGGACTGTTTGTTTATTACACATCGCTCGGAGCTAGTGCATACCTGAATCGTACGTAGTGGGGCGACCCTGGCCCTTACCACCTTGCGGGGTGCAAGGCCAACACAACGCCGGGCCGATGGGCCTTCCTGACATGCGCGCAATGTGGAGTTCGTCTAAAAAACCCTGACTTCAACGAAGTCCTTGCATCAGTTAGTTTTCCTGGAGAAGAAACCGTAATAGATGCTGAATTTACTGGAAAGCCCACCTGGCCCTCCATGTCACTGTACGTTATGTAGCTAGCCCCCAGGGGCCCAAGTCGTTGCAACTACCGATAGGACGCGTTATCTTCAGTGAAGACCGCTTCAGTGCGACCTTGATCTCTCAGTGTACCGAGTTGCACCTAACACTGACGAGGCGTTATCGTTCATTGCGGGACTCTATCCTGGGTTATTAATAAGCGCATGGACTGAAAAGTCACGTCACTGATAGTCGTCTGGTCATACACATATATCGATTCGAAACGGCAATTGAATCTCAACGCATTTAACATTAGCCCTCCAGCTGCGCGCAACCGGTACTTGACGTTTATAAGCGTGGTGTAGTGCTGGCTCGGTGACAGCATCGGCCAAGACTAGGCGTATATCATGCAGATCGACACCCCATACCATTGCAGCACCATCAATTATAGCGGGGTTTGAGTTCGCTATGTGTGTCATAGGCATGTTTGTTATCTCTTGTGCCTGGCTACATTGGTCTAGGCTACAAGAGAGCCCCACTGTGGTAACGACTGGTATGCTACAGCG"
let dnapal = [for i in 0..dnastr.Length - 1 do
for j in 3..7 do
if i + j < dnastr.Length then
let dstr = dnastr.[i..i+j]
//printfn " i %d j %d i + j %d w %d" i j (i + j) dstr.Length
if dstr = rev_complement dstr then yield (i + 1,j + 1)]
String.Join("\n", dnapal |> List.map (fun (a,b) -> string a + " " + string b)) |> Clipboard.SetText
/////////
let subsqA = "GCGCAAGAATTGTCAAAATTGCCAGTCACCGTACGCCCGATCGCCAGGCCGTCTAACTCTCACATCACCAGGGGGAGCCAGGTGAAAATTCCACCAGAATTTTCGAGATCCTCTAACAGGTCGTTCTCTTACACAATTCGGTCGATGACCAGTTGCGCTTAGTGCAGTTTTGCGAATAACTCGAGCTAAGAATGGGGACCAGTGGAAAAAAGCGTCTTAATGCGCGTTCTGGCGGGAATGAGAAGTCAGCTGACAGCACACCTCGCGCGTGTCTTGCAATTCAGTCTCATTACCCCTCCGCTTAGTCTCATACTTCAACATGCTGCGATTGATAGCTTTTTGAGCATCACGTATGAGTATGTGGCTGACAAAGCTATATTGTTAGAAAGCACCACCGGTCAAATTAAATAAGTATTCCAGAGTAGACCCAACATTGCGAGCCCGCTATATCATCGTGATCGACTCATACGAGAACCGGGCGAGTTTACTTAACCCATTTGGCCATGTGTTCATGGATTGATACGATCTTGATAACGGGCCGATTCCGTCGGAGTTGTAGGTCCCACACTGGTGCGGACTGCCATCTGTGTGTTCTGATCGTTACAATATACCTGTTCACCACAAGGAACCTTCAATTGGTTTGCTCGATTTATGATAATAACGCTTTTCTGAATACGTTCGTTTCCAGTTGGGGCCTCTGACGGATTGGGCACTGGGGAGCGGCGACCGCGACTTTCAGCGAGAGACCGAAGCTGCCACTTTCGAGAAGAGATAGCATGTGAGCGCGCGGGAGGAGAATGACATCTACGCTCTTCCTCATGAAGTGGTGCCGAGGTCAGCAGCTTACGCGCACCGA"
let subsqB = "ATTTGCTCGCAGTACGAGAGATTTTTTTACTAAACCTAAGCAACCCAGAAAAGTAGGCTTACTCATAGAGGGATGAGTAGCATGTCGCAATTGCGGGCCATCTCCGTCAATCGGCGAATCGGCAACGCGTGAAAGGGTGTGTAGGAACCCACTTCTATAAACAAGTAGTGCTTCTGACCATACAATCAACAATGGTAGAGAACGATAAGAGCTCAGGGCGTCCATGCTTAGTTAATGGGGTCTGAGCGCTGAAAGATGCTGGTGCTCTAAACACCTCTGCGCTTTTTAGCTGTAACTTCAGCATACACTTGAGTTGAACACTATGGCGTTAGTGCTAAAACTTAATTCAAGGTATCAGTAACCGAGTGTGCTCGGAAAGGAGGGTCCCGCCTTGTCTCCTAGCACTCAGACCAATTTATTTTTTAGGGACCTGAAGACAGCATTGCCTCGATTGCCACTGGAGAAGCCGGATTGTAGGTCCTTTACCGCTGAAGAAAGTGATAAGTTAAACAGCTCATAAACCGGAGCTCATGTTACGAAGCGGGCAGCGCTATTATTTTAGAGGCGATCCTCCACCCTTTGAATCGCTTTTCCAACCGGGCTCACATATGCTCCCCAGTTTTCTGACTCAGACTCACATGGCCCAGTAGGTAATATGGTGTATCAGTTACGCACTCGCCGACGTGGTTGCTCGCCATTATTTTCAACCCGTATAAAGCGCCGCTGCTCCATATCGACAGTGGTGCGTAGATGTATGCGCAGGCAGTTATTGCCGATTCTGGCCACCAGAAAGTCGCTAGTCTCAACTCGACAGTTTGAGAGGAGAAACACTCGTATAAAGTAATTGACGTGGGCCGCAACTCAAAGG"
let ln ,i ,j, d = HelperFunctions.longestCommonSubSeq subsqA subsqB
let t = HelperFunctions.backtrackLCS d subsqA subsqB i j
t |> Clipboard.SetText
///////////
let levStr = "WTDVFDHESYMIWVFCYGSMCLWFMHVRILGDGHWWVEEGRGKCLFCYMRSTIVDFKFRGGFHCGAWQGDERTMVTQQLPGRDAGNWMFLDEQPSYQTGSRGRRQCTMKPQCMGESMVSIINQICGIAWIHIHGRYGQTGRRDVGAGYDCTFHRNAYWLRGGNQHCGFNGINNKLKCVHVQVSVGPKDFPLKESSPVVEAPVMAMLKRYWLLKRSLRDDNETDAHMKKNVQDAAMPITRETKKTWQGVFSQPEMGWWWNMEPKMGECNVKIYIMLIKIVFFESFYHARPVHSSFAMISYPRLENHPRGCYFHHFFEIWWGGGHENEDRSQSRMQGDWTTGLRSDDTGMDDLRQFTGGNPCSTHQMDNCYSGAKMNPHWGCRKMRTIFMFFFEFMYYAHHTCVIVMDNDSTALILSFGWEDLELGFYWRDFSSDRDDCTQCMGENKVEWAWKGRIVKVFSRVKWLYMILCGSNEIVTWAGIPRHMVKYFLMVSCGAVRCADDRPKAWLLWYVDLVTMLPITAKLLDLMHETWFDRINYVESYKCFITRYQILDSINHLTHYVKFHCLHFFEAVDQFLVAVSLQGWKYHVLWKPLIHDYVKDLTKQGIPVKYKKLDNLQIEYNNIEYTMYIWNQLNSDVLNLGNFCGERRRGLWASNFCSCPQRRAKVGRCWTFPSWVCSGNMRLLVKMYMFNHAYINESIYCLETDMPENVRIIHWVMTKFWFYEMCCQPYCTAMPVYTQNYTEKKRCHNIECWDFNPYGQYVIVVWCTVVRNAEVPKMPASESLQEDFYKLVSIVTQNGCKHDYYEKGNNFVRTVEYVKFPQPQMAVSITEVICVWYFQPGQERCTYMVTIECEELYVNCCWFANGLDYGPLHINAHEMCKIAATMIVQRTDPRQCWQAHRNFNMMIVWGHQNWLRGLAPCDGQELVFKAGCTWQAVDQGQTALSWNTQLTELTGTFCDWGHRRTCEDIRNK"
let levstr2 = "YVNLRQVRAEVNQFYMVYQEVNISYLTVSTQQTMNHIPYCTHYPIWCGYNTADFYCWGTHHKKEFGMQKARKYGGDKMGLHLVYMGVIDRSASLINIKMINGRLWEEWEEELRDVSQFLWQMQGLVNSVKKYKTRKRPEWCAYTGRKWPNTKANGTFTPSRHQPLIARMDYQMFLNQQTTEYKWRCYPFQCSTKCGYLIWIYKQMEMWFRMNPLQYCEETFEEQQIAGFCLGVRWVNHDDEIYMEGPPWEPTNNIVKMYWETALPLWPTYDKWYHAPNFREYIANAWTSIGWNIFIAAPCVDYQYTVFKWLQELEAYETVSLMKLTYIQETLVHVDEETTFRCAMAKMNAAIFLRLKFCTEALESKEQMKSDCLWPDLVDQKIKADTHNLYDAWVRPAKNKKEKGEDNKSAVHMLTASMRISVNETDHAECKVGFLSMLYMENNTQNAMQFQNWDRAVKPFELWPMCRWKPHADNCMLTIKTIGDDGCNQISANPDRYQCECHELHTPTKRRRQDPSSKEHQFTNVMAYSVLFRMDRCHLEMGMWLFRHDNEPVARSMEVHYRHDVGIQQGNVLPEPMGARCMWPCTDSPYYTVWPTISKMILWMEKICPWGKKFEATHYVRDKNRIEGAMPCMDDYMVVWSKNCITQGIDKLADPTGKQINLMCNWQEGVMKNGCKQEHNWDVRMTSYLVHTCPYCPTLTCCGQHKGRLDCFIMFYRLPKYQINRVSSEIMPQQGMISVCVYCGFYKFLVFGLECACWDFTMAVCQWSPTTAQIHSSCDEMYMQVHRFFIHMEKMGRKENQMDYICQQYDCMNQCMQLQRRKDPVQFNWDLSSTHIIPGRENIEKLCKEQYHPTKVFANFCGMDNCIAGQHDWQLIRFYCSAPYKQMSAFKWMKHMLIKNNVLNYPFKFAPETRITPLMWWENKPCAVQLVSKHDSGNFGLPVYTCRGCVLDRQFMRCQNTIRWCTDSSI"
HelperFunctions.LevenshteinDistance levStr levstr2
/////////////
let kmers = lexic 4 "A C G T"
let dnaF = @">Rosalind_6431
CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG
CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT
TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA
AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG
GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA
CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA
CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG" |> readFASTA |> Seq.head |> snd
let dnaF2 = File.ReadAllText(dir+"rosalind_kmer.txt").Trim()
let dnaStrF2 = (dnaF2 |> readFASTA).[0] |> snd
String.Join(" ",kmers |> Array.map (motifs dnaStrF2 >> List.length)) |> Clipboard.SetText
/////////////////////////
let sr = @"ATTAGACCTG
CCTGCCGGAA
AGACCTGCCG
GCCGGAATAC".Split('\n') |> Array.mapi (fun i s -> string i, s) |> Map.ofArray
let fourth (a,b,c,d) = d
let fst4 (a,b,c,d) = a
let rec seekSuper (sSet:Map<string,string>) =
let s = sSet |> Map.toArray
let ovl =[for i in 1..(snd s.[0]).Length - 2 -> overlapGraph i s |> fst]
|> List.concat
|> List.sortBy (snd >> snd)
let mivl = ovl |> List.filter (fun (_,(_,s)) -> s <= (ovl.Head |> snd |> snd))
|> List.map (fun ((e1, e2),(_,sc)) ->
let so = (snd s.[int e1]).[0..sc - 1] + (snd s.[int e2])
printfn "%A" (sc,so)
sc,e1,e2, so)
match mivl with
| [a] -> (fourth a).Length, fourth a
| os -> os |> List.map (fun (_, e1,e2,str) ->
printfn "%A" str
let nmap = Map.remove e1 sSet |> Map.remove e2
seekSuper nmap)
|> List.minBy fst
seekSuper sr
//[("3", "GCCGGAATAC"); ("1", "CCTGCCGGAA"); ("2", "AGACCTGCCG");
// ("0", "ATTAGACCTG")]
// ATTAGACCTG AGACCTGCCG
// ATTAGACCTGCCG CCTGCCGGAA
//
// ATTAGACCTGCCGGAATAC
// ATTAGACCTGCCGGAATAC
///////////////////////////
type RandomVariable<'a when 'a : comparison> = Map<'a, float * float>
let makeDist (items: ('a * float) []) =
let sum = items |> Array.sumBy snd
items |> Map.ofArray
|> Map.map (fun _ count -> count, count/sum) : RandomVariable<'a>
let normalize dist =
let sum = sumMapGen (fun op (count,_) -> op count) dist
dist |> Map.map (fun _ (count,_) -> count, count/sum)
type Dominance = Dominant | Recessive
type Allele = Homozygous of Dominance | Heterozygous
let pops = [|("k",Homozygous(Dominant)), 21096.; ("m", Heterozygous) , 27320.;
("l",Homozygous Recessive),33757.|] |> makeDist
let takeOne thing ps = let item , rest = Map.partition (fun key _ -> thing = key) ps
let less1, _ = item.[thing]
let recount = if less1 - 1. = 0. then rest |> normalize
else rest.Add(thing, (less1 - 1., 0.)) |> normalize
item.[thing], recount
let mate p = function
| Homozygous Dominant, _
| _ , Homozygous Dominant -> p
| Heterozygous , Heterozygous -> 0.75 * p
| _, Heterozygous
| Heterozygous , _ -> p * 0.5
| Homozygous Recessive , Homozygous Recessive -> 0.
let v = [|for x in pops do
let (_,p),r = takeOne x.Key pops
let al = snd x.Key
for y in r do
let (_,al2), (_,p2) = keyValueToPair y
yield al, al2, mate (p*p2) (al, al2) |]
v |> Array.sumBy third
//////////////////////////////////////
let combins = MathNet.Numerics.Combinatorics.Combinations
let f total atleast p = [for x in atleast..total -> p ** x * (1. - p) ** (total - x) * combins(int total,int x)]
let iall k N =
Math.Round(f (2. ** k) N 0.25 |> List.sum,3)
iall 7. 32.
////////////////////////////////
let regex = "N[^P][S|T][^P]"
open System.Net
let wc = new System.Net.WebClient()
let readFASTA2 (s:string) =
let dats = s.Split([|"\r\n";"\n"|], System.StringSplitOptions.RemoveEmptyEntries)// Regex.Split(s,("[\r\n|\n]"))
dats.[0], String.Join("",dats.[1..])
let str = ">sp|P10761|ZP3_MOUSE Zona pellucida sperm-binding protein 3 OS=Mus musculus GN=Zp3 PE=1 SV=4
MASSYFLFLCLLLCGGPELCNSQTLWLLPGGTPTPVGSSSPVKVECLEAELVVTVSRDLFGTGKLV
QPGDLTLGSEGCQPRVSVDTDVVRFNAQLHECSSRVQMTKDALVYSTFLLHDPRPVSGLSILRTNR
VEVPIECRYPRQGNVSSHPIQPTWVPFRATVSSEEKLAFSLRLMEENWNTEKSAPTFHLGEVAHLQ
AEVQTGSHLPLQLFVDHCVATPSPLPDPNSSPYHFIVDFHGCLVDGLSESFSAFQVPRPRPETLQF
TVDVFHFANSSRNTLYITCHLKVAPANQIPDKLNKACSFNKTSQSWLPVEGDADICDCCSHGNCSN
SSSSQFQIHGPRQWSKLVSRNRRHVTDEADVTVGPLIFLGKANDQTVEGWTASAQTSVALGLGLAT
VAFLTLAAIVLAVTRKCHSSSYLVSLPQ"
let motifs2 f (s:string) (rx:Regex) =
let rec getAll cs i = function
| false -> cs
| _ -> let nmatch = rx.Match(s, i)
match nmatch.Value with
| "" -> getAll cs i nmatch.Success
| _ -> getAll (f nmatch::cs) (nmatch.Index + 1) nmatch.Success
getAll [] 0 true
let rx = Regex(regex)
let findMotif (p:string) =
let dna = wc.DownloadString(sprintf "http://www.uniprot.org/uniprot/%s.fasta" (p.Trim()))
|> readFASTA2
|> snd
let ccv = motifs2 (fun x -> x.Index + 1) dna rx |> List.rev
printfn "%A" ccv
p, ccv
let dats =
@"Q0IBF4
Q8LCP6
P12923
Q9D9T0
Q706D1
P05155_IC1_HUMAN
P00743_FA10_BOVIN
P09791_PARB_TRYBB
A1JP66
P06870_KLK1_HUMAN
P07204_TRBM_HUMAN
Q9LHF1
P00304_ARA3_AMBEL
P02974_FMM1_NEIGO
P02186".Split('\n') |> Array.map findMotif
|> Array.filter (snd >> List.length >> (<>) 0)
|> Array.map (fun (id,inds) -> sprintf "%s\n%s" id (String.Join(" ", inds)))
String.Join("\n",dats)
/////////////////////////////
let a = [19117.; 19625.; 17122.; 17379.; 16109.; 18020.] |> List.mapi (fun i x -> let p = match i with | j when j <= 2 -> 1. | 3 -> 0.75 | 4 -> 0.5 | _ -> 0.
2. * x * p )
a |> List.sum
////////////////////////////
let partialp2 n k =
let num, den = set [2.0..n],set [2.0..(n - k)]
Set.difference num den |> Set.fold (*) 1.
Numerics.BigInteger(partialp2 84. 9.) % 1000000I
let na = "ACGATACAA" |> Seq.fold (fun x c -> x + log10 (match c with | 'G' | 'C' -> 0.129/2. | _ -> (1. - 0.129)/2.)) 0.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment