|
open System |
|
open System.Collections |
|
open System.Collections.Generic |
|
open System.Diagnostics |
|
open System.Linq |
|
|
|
module Algos = |
|
// |
|
// non-lazy solutions |
|
// |
|
|
|
let eager1 (value: string) = |
|
let result = ResizeArray(value.Length) |
|
for i in 0 .. value.Length - 1 do |
|
if not (Char.IsLowSurrogate(value.[i])) |
|
then result.Add(Char.ConvertToUtf32(value, i)) |
|
result.ToArray() |
|
|
|
let eager2 (value: string) = |
|
let result = ResizeArray(value.Length) |
|
for i in 0 .. value.Length - 1 do |
|
if not (Char.IsLowSurrogate(value.[i])) |
|
then result.Add(Char.ConvertToUtf32(value, i)) |
|
(result.ToArray()) :> seq<_> |
|
|
|
let eager3 (value: string) = |
|
let result = ResizeArray(value.Length) |
|
for i in 0 .. value.Length - 1 do |
|
if not (Char.IsLowSurrogate(value.[i])) |
|
then result.Add(Char.ConvertToUtf32(value, i)) |
|
// ToArray() causes another copy to be generated. |
|
// Avoiding that is a win in large-input scenarios, but at a cost |
|
// of otherwise slower processing |
|
(result) :> seq<_> |
|
|
|
// |
|
// lazy solutions |
|
// |
|
|
|
let lazy0(value: string) = |
|
seq { |
|
let i = ref 0 |
|
while !i < value.Length do |
|
if not (Char.IsLowSurrogate(value.[!i])) |
|
then yield Char.ConvertToUtf32(value, !i) |
|
incr i |
|
} |
|
|
|
let lazy1(value: string) = |
|
seq { |
|
for i = 0 to value.Length - 1 do |
|
if not (Char.IsLowSurrogate(value.[i])) |
|
then yield Char.ConvertToUtf32(value, i) |
|
} |
|
|
|
let lazy2(value: string) = |
|
value |
|
|> Seq.mapi (fun i c -> |
|
if(Char.IsLowSurrogate(c)) then 0 |
|
else Char.ConvertToUtf32(value, i)) |
|
|> Seq.filter (fun x -> x <> 0) |
|
|
|
let lazy3(value: string) = |
|
let inline mapiAndFilter f g e = |
|
seq { |
|
let i = ref 0 |
|
for elem in e do |
|
if f elem then yield g !i |
|
incr i |
|
} |
|
|
|
value.AsEnumerable() |
|
|> mapiAndFilter (not << Char.IsLowSurrogate) (fun i -> Char.ConvertToUtf32 (value, i)) |
|
|
|
let lazy4 (value : string) = |
|
let mapiAndFilter f g (e : seq<'a>) : seq<'b> = |
|
let i = ref -1 |
|
let e = e.GetEnumerator() |
|
|
|
let inline getEnum() = |
|
{ new IEnumerator<'b> with |
|
member __.Current = g !i |
|
interface System.Collections.IEnumerator with |
|
member __.Current = box (g !i) |
|
member __.MoveNext() = |
|
let rec next() = |
|
incr i |
|
e.MoveNext() && (f e.Current || next()) |
|
next() |
|
member __.Reset() = failwith "reset" |
|
interface IDisposable with |
|
member __.Dispose() = e.Dispose() } |
|
{ new IEnumerable<'b> with |
|
member __.GetEnumerator() = getEnum() |
|
interface IEnumerable with |
|
member __.GetEnumerator() = getEnum() :> IEnumerator } |
|
|
|
value.AsEnumerable() |
|
|> mapiAndFilter (not << Char.IsLowSurrogate) (fun i -> Char.ConvertToUtf32(value, i)) |
|
|
|
// integrate all logic directly |
|
// avoids need to use System.String enumerator |
|
// avoids all recursion |
|
// very fast! |
|
let lazy5 (value : string) = |
|
let inline getEnum() = |
|
let i = ref -1 |
|
{ new IEnumerator<int> with |
|
member __.Current = Char.ConvertToUtf32(value, !i) |
|
interface System.Collections.IEnumerator with |
|
member __.Current = box (Char.ConvertToUtf32(value, !i)) |
|
member __.MoveNext() = |
|
incr i |
|
if !i >= value.Length then false else |
|
if not (Char.IsLowSurrogate(value.[!i])) then true else |
|
incr i |
|
!i < value.Length |
|
member __.Reset() = failwith "reset" |
|
interface IDisposable with |
|
member __.Dispose() = () } |
|
{ new IEnumerable<int> with |
|
member __.GetEnumerator() = getEnum() |
|
interface IEnumerable with |
|
member __.GetEnumerator() = getEnum() :> IEnumerator } |
|
|
|
module Timing = |
|
let timeit f m = |
|
GC.Collect() |
|
GC.WaitForPendingFinalizers() |
|
Stopwatch.StartNew() |> fun sw -> (f (), sw.Elapsed.TotalSeconds, m) |
|
|
|
let rankTimings fs = |
|
let results = |
|
fs |
|
|> List.map (fun (f, m) -> timeit f m) |
|
|> List.sortBy (fun (_,t,_) -> t) |
|
let (_, baseline, _) = results.Head |
|
results |> List.iter (fun (_, t, m) -> printfn "%10s %6f %6f" m t (t/baseline)) |
|
|
|
let input = "ab\U0001ABCDcde\U0001ABCEfghi\U0001ABCF" |
|
|
|
let run f = for _ in 1 .. 1000000 do f input |> Seq.length |> ignore |
|
|
|
printfn "**run**" |
|
Timing.rankTimings [ |
|
(fun _ -> run Algos.eager1), "eager1" |
|
(fun _ -> run Algos.eager2), "eager2" |
|
(fun _ -> run Algos.eager3), "eager3" |
|
(fun _ -> run Algos.lazy0), "lazy0" |
|
(fun _ -> run Algos.lazy1), "lazy1" |
|
(fun _ -> run Algos.lazy2), "lazy2" |
|
(fun _ -> run Algos.lazy3), "lazy3" |
|
(fun _ -> run Algos.lazy4), "lazy4" |
|
(fun _ -> run Algos.lazy5), "lazy5" |
|
] |
|
|
|
let runmap f = for _ in 1 .. 1000000 do f input |> Seq.map id |> Seq.length |> ignore |
|
|
|
printfn "**runmap**" |
|
Timing.rankTimings [ |
|
(fun _ -> runmap Algos.eager1), "eager1" |
|
(fun _ -> runmap Algos.eager2), "eager2" |
|
(fun _ -> runmap Algos.eager3), "eager3" |
|
(fun _ -> runmap Algos.lazy0), "lazy0" |
|
(fun _ -> runmap Algos.lazy1), "lazy1" |
|
(fun _ -> runmap Algos.lazy2), "lazy2" |
|
(fun _ -> runmap Algos.lazy3), "lazy3" |
|
(fun _ -> runmap Algos.lazy4), "lazy4" |
|
(fun _ -> runmap Algos.lazy5), "lazy5" |
|
] |
|
|
|
let runmapnth f = for _ in 1 .. 1000000 do f input |> Seq.map id |> Seq.nth 6 |> ignore |
|
|
|
printfn "**runmapnth**" |
|
Timing.rankTimings [ |
|
(fun _ -> runmapnth Algos.eager1), "eager1" |
|
(fun _ -> runmapnth Algos.eager2), "eager2" |
|
(fun _ -> runmapnth Algos.eager3), "eager3" |
|
(fun _ -> runmapnth Algos.lazy0), "lazy0" |
|
(fun _ -> runmapnth Algos.lazy1), "lazy1" |
|
(fun _ -> runmapnth Algos.lazy2), "lazy2" |
|
(fun _ -> runmapnth Algos.lazy3), "lazy3" |
|
(fun _ -> runmapnth Algos.lazy4), "lazy4" |
|
(fun _ -> runmapnth Algos.lazy5), "lazy5" |
|
] |
|
|
|
let biginput = String.replicate 262144 input |
|
let runBig f = for _ in 1 .. 100 do f biginput |> Seq.length |> ignore |
|
|
|
printfn "**runBig**" |
|
Timing.rankTimings [ |
|
(fun _ -> runBig Algos.eager1), "eager1" |
|
(fun _ -> runBig Algos.eager2), "eager2" |
|
(fun _ -> runBig Algos.eager3), "eager3" |
|
(fun _ -> runBig Algos.lazy0), "lazy0" |
|
(fun _ -> runBig Algos.lazy1), "lazy1" |
|
(fun _ -> runBig Algos.lazy2), "lazy2" |
|
(fun _ -> runBig Algos.lazy3), "lazy3" |
|
(fun _ -> runBig Algos.lazy4), "lazy4" |
|
(fun _ -> runBig Algos.lazy5), "lazy5" |
|
] |
|
|
|
let runBigNth f = for _ in 1 .. 100 do f biginput |> Seq.map id |> Seq.nth 6 |> ignore |
|
|
|
printfn "**runBigNth**" |
|
Timing.rankTimings [ |
|
(fun _ -> runBigNth Algos.eager1), "eager1" |
|
(fun _ -> runBigNth Algos.eager2), "eager2" |
|
(fun _ -> runBigNth Algos.eager3), "eager3" |
|
(fun _ -> runBigNth Algos.lazy0), "lazy0" |
|
(fun _ -> runBigNth Algos.lazy1), "lazy1" |
|
(fun _ -> runBigNth Algos.lazy2), "lazy2" |
|
(fun _ -> runBigNth Algos.lazy3), "lazy3" |
|
(fun _ -> runBigNth Algos.lazy4), "lazy4" |
|
(fun _ -> runBigNth Algos.lazy5), "lazy5" |
|
] |