Skip to content

Instantly share code, notes, and snippets.

@cartermp
Last active December 29, 2020 00:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cartermp/a56eb071fa40dfb3568a7a41cda0bd11 to your computer and use it in GitHub Desktop.
Save cartermp/a56eb071fa40dfb3568a7a41cda0bd11 to your computer and use it in GitHub Desktop.
open System.Threading.Tasks
open System.Text.RegularExpressions
let regex s = Regex(s, RegexOptions.Compiled)
let input = System.IO.File.ReadAllText($"{__SOURCE_DIRECTORY__}/regexredux-input5000000.txt")
let text = (regex ">.*\n|\n").Replace(input, "")
let regexCount pattern text =
let rec loop c (m:Match) =
if not m.Success then c
else loop (c+1) (m.NextMatch())
let c = loop 0 ((regex pattern).Match text)
$"{pattern} {c}"
let replaceTask =
Task.Run(fun _ ->
let res =
[
"tHa[Nt]", "<4>"
"aND|caN|Ha[DS]|WaS", "<3>"
"a[NSt]|BY", "<2>"
"<[^>]*>", "|"
"\\|[^|][^|]*\\|", "-"
]
|> List.fold (fun s (pattern, replace) ->
(regex pattern).Replace (s, replace)) text
$"\n{input.Length}\n{text.Length}\n{res.Length}")
let counts =
[|
"agggtaaa|tttaccct"
"[cgt]gggtaaa|tttaccc[acg]"
"a[act]ggtaaa|tttacc[agt]t"
"ag[act]gtaaa|tttac[agt]ct"
"agg[act]taaa|ttta[agt]cct"
"aggg[acg]aaa|ttt[cgt]ccct"
"agggt[cgt]aa|tt[acg]accct"
"agggta[cgt]a|t[acg]taccct"
"agggtaa[cgt]|[acg]ttaccct"
|]
|> Array.Parallel.map (fun s -> regexCount s text)
for count in counts do
stdout.WriteLine count
stdout.WriteLine replaceTask.Result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment