Last active
February 22, 2016 19:49
-
-
Save SpaceAntelope/24733ce6912aa86cc4f0 to your computer and use it in GitHub Desktop.
DNA Shotgun solution for /r/dailyprogrammer (https://www.reddit.com/r/dailyprogrammer/comments/46km7n/20160219_challenge_254_hard_dna_shotgun_sequencing/)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.Collections | |
open System.Collections.Generic | |
let inline (<+>) (left : string) (right : string) = | |
if left.Contains(right) then left | |
elif right.Contains(left) then right | |
else | |
let nLeft = left.Length | |
let rec merge depth = | |
if depth = 0 then "" | |
elif left.Substring(nLeft - depth) <> right.[..depth - 1] then merge (depth - 1) | |
elif depth > 0 then left + right.[depth..] | |
else "" | |
merge (System.Math.Min(left.Length, right.Length)) | |
type SearchResultKind = | |
| Backtrack | |
| Stop | |
| Finished | |
type Node = | |
{ ThreadID : int | |
LastAdded : string | |
Path : string | |
RemainingSequences : int [] } | |
let mutable InputIndex = [| "" |] | |
let ExpandNode(current : Node) = | |
current.RemainingSequences | |
|> Array.map (fun i -> | |
let remaining = current.RemainingSequences |> Array.where (fun i' -> i <> i') | |
let candidate = InputIndex.[i] | |
let baseNode = | |
{ ThreadID = current.ThreadID | |
LastAdded = candidate | |
Path = current.Path | |
RemainingSequences = remaining } | |
let fromRight = current.Path <+> candidate | |
let fromLeft = candidate <+> current.Path | |
match fromRight, fromLeft with | |
| "", "" -> [| None |] | |
| "", _ -> [| Some { baseNode with Path = fromLeft } |] | |
| _, "" -> [| Some { baseNode with Path = fromRight } |] | |
| _, _ when fromRight.Length < fromLeft.Length -> [| Some { baseNode with Path = fromRight } |] | |
| _, _ when fromRight.Length > fromLeft.Length -> [| Some { baseNode with Path = fromLeft } |] | |
| _, _ when fromRight.Length = fromLeft.Length && fromRight = fromLeft -> [| Some { baseNode with Path = fromRight } |] | |
| _, _ when fromRight.Length = fromLeft.Length && fromRight <> fromLeft -> | |
[| Some { baseNode with Path = fromRight } | |
Some { baseNode with Path = fromLeft } |] | |
| _, _ -> [| None |]) | |
|> Array.collect (fun child -> child) // Flatten array of arrays | |
|> Array.choose (fun child -> child) // Filter out None results | |
|> Array.sortBy (fun child -> child.Path.Length) // Apply best first heuristic | |
let mutable stopRunning = false | |
let printLock = ref 0L | |
let (|ExpectedResult|UnexpectedOptimalResult|SuboptimalResult|SuperoptimalResult|) ((node : Node),expectedResult) = | |
if node.Path = expectedResult then ExpectedResult | |
elif node.Path.Length = expectedResult.Length then UnexpectedOptimalResult | |
elif node.Path.Length > expectedResult.Length then SuboptimalResult | |
else SuperoptimalResult | |
let ShotgunSearch (input : string list) (expectedResult : string) = | |
let sw = System.Diagnostics.Stopwatch.StartNew() | |
let rec DFS(node : Node) = | |
if stopRunning = true then Stop | |
//elif node.Path.Length > expectedResult.Length then () // Uncomment to cheat and prune when current length exceeds expected solution length | |
else | |
match node.RemainingSequences with | |
| [||] -> lock printLock (fun () -> | |
printfn "" | |
printfn "Thread ID: %d" node.ThreadID | |
printfn "Solution reached in %d:%d:%d" (sw.ElapsedMilliseconds / 60000L) ((sw.ElapsedMilliseconds % 60000L) / 1000L) | |
((sw.ElapsedMilliseconds % 1000L)) | |
match node, expectedResult with | |
| ExpectedResult -> | |
printfn "Finished with expected result [%s] at length (%d)" node.Path node.Path.Length | |
stopRunning <- true | |
Finished | |
| UnexpectedOptimalResult -> printfn "Finished with unexpected but optimal result [%s] at length (%d)" node.Path node.Path.Length; | |
Backtrack | |
| SuboptimalResult -> printfn "Finished with suboptimal result [%s] at length (%d (optimal: %d)" node.Path node.Path.Length expectedResult.Length; | |
Backtrack | |
| SuperoptimalResult -> printfn "!!! Finished with unexpected but superoptimal result [%s] at length (%d (optimal: %d)" node.Path node.Path.Length expectedResult.Length; | |
Backtrack) | |
| _ -> if ExpandNode node |> Array.map (DFS) |> Array.exists (fun item -> item = Finished) then Finished | |
else Backtrack | |
InputIndex <- input |> Array.ofList | |
input | |
|> Seq.mapi (fun i sequence -> | |
async { | |
lock printLock (fun () -> printfn "Thread %d: Starting search from %s" i sequence) | |
{ ThreadID = i | |
LastAdded = sequence | |
Path = sequence | |
RemainingSequences = [| 0..(input.Length - 1) |] |> Array.where (fun i' -> i <> i') } | |
|> DFS |> (fun result -> lock printLock (fun () -> printfn "Thread %d: %A" i result)) | |
}) | |
|> Async.Parallel | |
|> Async.RunSynchronously | |
|> ignore | |
let input = [ "tgca"; "taggcta"; "gtcatgcttaggcta"; "agcatgctgcag"; "tcatgct" ] | |
let expectedResult = "agcatgctgcagtcatgcttaggcta" | |
let input' = | |
@"gatccatctggatcctatagttcatggaaagccgctgc | |
tatttcaacattaattgttggttttgatacagatggtacacca | |
aaaagaaattcaaaaagaacaagaaaaatctgaaaaacaacaaaa | |
ggaatgtcaatcctatagattaactgttgaagattcaccatcagttg | |
tggaaataaaaatattgaaattgcagtcattagaaataaacaac | |
tcaagtagaatatgccatggaagcagtaagaaaaggtactgttg | |
tgcaagatcaattagaaaaatcgttaaattagatgaccacatt | |
tgtcgttgaagctgaaaaagaaattcaaaaagaacaagaaaaatct | |
gaaaaacaacaaaaataaattacatcaaattccttttttt | |
caatcgttttattagatgaacaagaaattgataaattagttgc | |
aatctttatcaaactgatccatctggatcctatagttcatg | |
gaaattgcagtcattagaaataaacaaccaatcgttttattagatg | |
atcgttaaattagatgaccacatttgtttaacctttgctggt | |
aattatacagacgttagtgaagaggaatcaattaaattagcagtta | |
tatactcaaagtggtggtgttagaccatttggtatttcaacattaat | |
ttttaggtgttgaaaagaaagcaaccgctaaacttcaaga | |
aagaaagcaaccgctaaacttcaagatgcaagatcaattagaaaa | |
ccccacctttttttttaattatcttcaagtttttttaaaaaaaaaaaaaaaa | |
gaatttttagaaaagaattatacagacgttagtgaagaggaatc | |
agtgcaagatacgatagagcaattacagttttctcaccagatg | |
aattaaattagcagttagagctttattagagattgttgaaag | |
cagttggtgtacgtggtaaagatgttattgttttaggtgttgaa | |
ttcaacaacgttatactcaaagtggtggtgttagaccatttgg | |
ataaattacatcaaattcctttttttccccacctttttttt | |
aattggtcgtagttcaaagagtgttggtgaatttttagaaaag | |
aatatatttctaaatttattgctggtattcaacaacgt | |
aacaagaaattgataaattagttgctgtcgttgaagctga | |
gagctttattagagattgttgaaagtggaaataaaaatatt | |
ttaactgccgattcacgtgtattaattagtaaagcattaat | |
acgatagagcaattacagttttctcaccagatggtcatctttt | |
aaggtactgttgcagttggtgtacgtggtaaagatgttattg | |
tgtttaacctttgctggtttaactgccgattcacgtgtattaatt | |
aataatataatatatatataaatacataataatgtcaagtgcaagat | |
agtaaagcattaatggaatgtcaatcctatagattaactgt | |
tgaagattcaccatcagttgaatatatttctaaatttattgctggta | |
gaaagccgctgcaattggtcgtagttcaaagagtgttggt | |
gtcatctttttcaagtagaatatgccatggaagcagtaagaa | |
tgttggttttgatacagatggtacaccaaatctttatcaaact".Split('\n') | |
|> List.ofArray | |
|> List.map (fun seq -> seq.Trim()) | |
let expectedResult' = | |
"aataatataatatatatataaatacataataatgtcaagtgcaagatacgatagagcaattacagttttctcaccagatggtcatctttttcaagtagaatatgccatggaagcagtaagaaaaggtactgttgcagttggtgtacgtggtaaagatgttattgttttaggtgttgaaaagaaagcaaccgctaaacttcaagatgcaagatcaattagaaaaatcgttaaattagatgaccacatttgtttaacctttgctggtttaactgccgattcacgtgtattaattagtaaagcattaatggaatgtcaatcctatagattaactgttgaagattcaccatcagttgaatatatttctaaatttattgctggtattcaacaacgttatactcaaagtggtggtgttagaccatttggtatttcaacattaattgttggttttgatacagatggtacaccaaatctttatcaaactgatccatctggatcctatagttcatggaaagccgctgcaattggtcgtagttcaaagagtgttggtgaatttttagaaaagaattatacagacgttagtgaagaggaatcaattaaattagcagttagagctttattagagattgttgaaagtggaaataaaaatattgaaattgcagtcattagaaataaacaaccaatcgttttattagatgaacaagaaattgataaattagttgctgtcgttgaagctgaaaaagaaattcaaaaagaacaagaaaaatctgaaaaacaacaaaaataaattacatcaaattcctttttttccccacctttttttttaattatcttcaagtttttttaaaaaaaaaaaaaaaa" | |
let result = ShotgunSearch input' expectedResult' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment