Skip to content

Instantly share code, notes, and snippets.

@latkin
Last active November 13, 2019 09:06
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save latkin/c04dc782a6fb34f4fae3 to your computer and use it in GitHub Desktop.
Save latkin/c04dc782a6fb34f4fae3 to your computer and use it in GitHub Desktop.
Seq perf
open System
open System.Collections
open System.Collections.Generic
open System.Diagnostics
open System.Linq
module Algos =
//
// non-lazy solutions
//
let eager1 (value: string) =
let result = ResizeArray(value.Length)
for i in 0 .. value.Length - 1 do
if not (Char.IsLowSurrogate(value.[i]))
then result.Add(Char.ConvertToUtf32(value, i))
result.ToArray()
let eager2 (value: string) =
let result = ResizeArray(value.Length)
for i in 0 .. value.Length - 1 do
if not (Char.IsLowSurrogate(value.[i]))
then result.Add(Char.ConvertToUtf32(value, i))
(result.ToArray()) :> seq<_>
let eager3 (value: string) =
let result = ResizeArray(value.Length)
for i in 0 .. value.Length - 1 do
if not (Char.IsLowSurrogate(value.[i]))
then result.Add(Char.ConvertToUtf32(value, i))
// ToArray() causes another copy to be generated.
// Avoiding that is a win in large-input scenarios, but at a cost
// of otherwise slower processing
(result) :> seq<_>
//
// lazy solutions
//
let lazy0(value: string) =
seq {
let i = ref 0
while !i < value.Length do
if not (Char.IsLowSurrogate(value.[!i]))
then yield Char.ConvertToUtf32(value, !i)
incr i
}
let lazy1(value: string) =
seq {
for i = 0 to value.Length - 1 do
if not (Char.IsLowSurrogate(value.[i]))
then yield Char.ConvertToUtf32(value, i)
}
let lazy2(value: string) =
value
|> Seq.mapi (fun i c ->
if(Char.IsLowSurrogate(c)) then 0
else Char.ConvertToUtf32(value, i))
|> Seq.filter (fun x -> x <> 0)
let lazy3(value: string) =
let inline mapiAndFilter f g e =
seq {
let i = ref 0
for elem in e do
if f elem then yield g !i
incr i
}
value.AsEnumerable()
|> mapiAndFilter (not << Char.IsLowSurrogate) (fun i -> Char.ConvertToUtf32 (value, i))
let lazy4 (value : string) =
let mapiAndFilter f g (e : seq<'a>) : seq<'b> =
let i = ref -1
let e = e.GetEnumerator()
let inline getEnum() =
{ new IEnumerator<'b> with
member __.Current = g !i
interface System.Collections.IEnumerator with
member __.Current = box (g !i)
member __.MoveNext() =
let rec next() =
incr i
e.MoveNext() && (f e.Current || next())
next()
member __.Reset() = failwith "reset"
interface IDisposable with
member __.Dispose() = e.Dispose() }
{ new IEnumerable<'b> with
member __.GetEnumerator() = getEnum()
interface IEnumerable with
member __.GetEnumerator() = getEnum() :> IEnumerator }
value.AsEnumerable()
|> mapiAndFilter (not << Char.IsLowSurrogate) (fun i -> Char.ConvertToUtf32(value, i))
// integrate all logic directly
// avoids need to use System.String enumerator
// avoids all recursion
// very fast!
let lazy5 (value : string) =
let inline getEnum() =
let i = ref -1
{ new IEnumerator<int> with
member __.Current = Char.ConvertToUtf32(value, !i)
interface System.Collections.IEnumerator with
member __.Current = box (Char.ConvertToUtf32(value, !i))
member __.MoveNext() =
incr i
if !i >= value.Length then false else
if not (Char.IsLowSurrogate(value.[!i])) then true else
incr i
!i < value.Length
member __.Reset() = failwith "reset"
interface IDisposable with
member __.Dispose() = () }
{ new IEnumerable<int> with
member __.GetEnumerator() = getEnum()
interface IEnumerable with
member __.GetEnumerator() = getEnum() :> IEnumerator }
module Timing =
let timeit f m =
GC.Collect()
GC.WaitForPendingFinalizers()
Stopwatch.StartNew() |> fun sw -> (f (), sw.Elapsed.TotalSeconds, m)
let rankTimings fs =
let results =
fs
|> List.map (fun (f, m) -> timeit f m)
|> List.sortBy (fun (_,t,_) -> t)
let (_, baseline, _) = results.Head
results |> List.iter (fun (_, t, m) -> printfn "%10s %6f %6f" m t (t/baseline))
let input = "ab\U0001ABCDcde\U0001ABCEfghi\U0001ABCF"
let run f = for _ in 1 .. 1000000 do f input |> Seq.length |> ignore
printfn "**run**"
Timing.rankTimings [
(fun _ -> run Algos.eager1), "eager1"
(fun _ -> run Algos.eager2), "eager2"
(fun _ -> run Algos.eager3), "eager3"
(fun _ -> run Algos.lazy0), "lazy0"
(fun _ -> run Algos.lazy1), "lazy1"
(fun _ -> run Algos.lazy2), "lazy2"
(fun _ -> run Algos.lazy3), "lazy3"
(fun _ -> run Algos.lazy4), "lazy4"
(fun _ -> run Algos.lazy5), "lazy5"
]
let runmap f = for _ in 1 .. 1000000 do f input |> Seq.map id |> Seq.length |> ignore
printfn "**runmap**"
Timing.rankTimings [
(fun _ -> runmap Algos.eager1), "eager1"
(fun _ -> runmap Algos.eager2), "eager2"
(fun _ -> runmap Algos.eager3), "eager3"
(fun _ -> runmap Algos.lazy0), "lazy0"
(fun _ -> runmap Algos.lazy1), "lazy1"
(fun _ -> runmap Algos.lazy2), "lazy2"
(fun _ -> runmap Algos.lazy3), "lazy3"
(fun _ -> runmap Algos.lazy4), "lazy4"
(fun _ -> runmap Algos.lazy5), "lazy5"
]
let runmapnth f = for _ in 1 .. 1000000 do f input |> Seq.map id |> Seq.nth 6 |> ignore
printfn "**runmapnth**"
Timing.rankTimings [
(fun _ -> runmapnth Algos.eager1), "eager1"
(fun _ -> runmapnth Algos.eager2), "eager2"
(fun _ -> runmapnth Algos.eager3), "eager3"
(fun _ -> runmapnth Algos.lazy0), "lazy0"
(fun _ -> runmapnth Algos.lazy1), "lazy1"
(fun _ -> runmapnth Algos.lazy2), "lazy2"
(fun _ -> runmapnth Algos.lazy3), "lazy3"
(fun _ -> runmapnth Algos.lazy4), "lazy4"
(fun _ -> runmapnth Algos.lazy5), "lazy5"
]
let biginput = String.replicate 262144 input
let runBig f = for _ in 1 .. 100 do f biginput |> Seq.length |> ignore
printfn "**runBig**"
Timing.rankTimings [
(fun _ -> runBig Algos.eager1), "eager1"
(fun _ -> runBig Algos.eager2), "eager2"
(fun _ -> runBig Algos.eager3), "eager3"
(fun _ -> runBig Algos.lazy0), "lazy0"
(fun _ -> runBig Algos.lazy1), "lazy1"
(fun _ -> runBig Algos.lazy2), "lazy2"
(fun _ -> runBig Algos.lazy3), "lazy3"
(fun _ -> runBig Algos.lazy4), "lazy4"
(fun _ -> runBig Algos.lazy5), "lazy5"
]
let runBigNth f = for _ in 1 .. 100 do f biginput |> Seq.map id |> Seq.nth 6 |> ignore
printfn "**runBigNth**"
Timing.rankTimings [
(fun _ -> runBigNth Algos.eager1), "eager1"
(fun _ -> runBigNth Algos.eager2), "eager2"
(fun _ -> runBigNth Algos.eager3), "eager3"
(fun _ -> runBigNth Algos.lazy0), "lazy0"
(fun _ -> runBigNth Algos.lazy1), "lazy1"
(fun _ -> runBigNth Algos.lazy2), "lazy2"
(fun _ -> runBigNth Algos.lazy3), "lazy3"
(fun _ -> runBigNth Algos.lazy4), "lazy4"
(fun _ -> runBigNth Algos.lazy5), "lazy5"
]

run

Algo time (s) multiplier
eager0 | 0.184025  | 1.000000
 lazy5 | 0.194004  | 1.054226
eager2 | 0.219321  | 1.191800
eager3 | 0.310223  | 1.685769
 lazy0 | 0.330531  | 1.796125
 lazy4 | 0.376372  | 2.045229
 lazy3 | 0.483981  | 2.629978
 lazy1 | 0.894039  | 4.858259
 lazy2 | 0.921062  | 5.005101

runmap

Algo time (s) multiplier
eager1 | 0.579327  | 1.000000
 lazy5 | 0.586707  | 1.012740
eager2 | 0.611499  | 1.055534
 lazy0 | 0.683276  | 1.179431
 lazy4 | 0.742995  | 1.282514
eager3 | 0.785129  | 1.355245
 lazy3 | 0.830547  | 1.433642
 lazy1 | 1.250160  | 2.157954
 lazy2 | 1.523762  | 2.630230

runmapnth

Algo time (s) multiplier
 lazy5 | 0.418695  | 1.000000
eager1 | 0.499834  | 1.193789
 lazy0 | 0.507692  | 1.212558
eager2 | 0.524433  | 1.252540
 lazy4 | 0.553255  | 1.321380
 lazy3 | 0.608237  | 1.452697
eager3 | 0.632419  | 1.510452
 lazy1 | 0.873706  | 2.086735
 lazy2 | 1.085998  | 2.593767

runBig

Algo time (s) multiplier
 lazy5 | 1.585626  | 1.000000
eager3 | 2.364669  | 1.491316
eager2 | 2.936370  | 1.851868
eager1 | 2.966829  | 1.871077
 lazy0 | 4.594467  | 2.897573
 lazy4 | 4.973711  | 3.136749
 lazy3 | 7.036134  | 4.437449
 lazy2 | 11.858302 |  7.478625
 lazy1 | 14.690765 |  9.264963

runBigNth

Algo time (s) multiplier
 lazy4 | 0.000270  | 1.000000
 lazy3 | 0.000296  | 1.099814
 lazy5 | 0.000306  | 1.135065
 lazy2 | 0.000352  | 1.305751
 lazy1 | 0.000453  | 1.679777
 lazy0 | 0.000624  | 2.314286
eager3 | 2.326347  | 8632.086085
eager2 | 2.832068  | 10508.601113
eager1 | 2.938856  | 10904.845640
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment