Skip to content

Instantly share code, notes, and snippets.

/EoSMWFvsSoA Secret
Created Jan 16, 2015

Embed
What would you like to do?
module SoEMWF =
let getName = "\r\nUses the Sieve of Eratosthenes to calculate the primes to argument.\r\n"
let private WRDBTS, FRSTBP = 16, 23UL
let private BPRMS = [| 2UL; 3UL; 5UL; 7UL; 11UL; 13UL; 17UL; 19UL |]
let private BWPS = [| //base wheel positions + 23
23uy;29uy;31uy;37uy;41uy;43uy;47uy;53uy; 59uy;61uy;67uy;71uy;73uy;79uy;83uy;89uy;
97uy;101uy;103uy;107uy;109uy;113uy;121uy;127uy; 131uy;137uy;139uy;143uy;149uy;151uy;157uy;163uy;
167uy;169uy;173uy;179uy;181uy;187uy;191uy;193uy; 197uy;199uy;209uy;211uy;221uy;223uy;227uy;229uy |]
let private GAPS = [| //wheel position deltas for each computation of next incremental cull position
6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 6uy; 6uy; 2uy; 6uy; 4uy; 2uy; 6uy; 4uy; 6uy; 8uy;
4uy; 2uy; 4uy; 2uy; 4uy; 8uy; 6uy; 4uy; 6uy; 2uy; 4uy; 6uy; 2uy; 6uy; 6uy; 4uy;
2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 10uy; 2uy; 10uy; 2uy; 4uy; 2uy; 4uy;
6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 6uy; 6uy; 2uy; 6uy; 4uy; 2uy; 6uy; 4uy; 6uy; 8uy; //2 rounds for overflow
4uy; 2uy; 4uy; 2uy; 4uy; 8uy; 6uy; 4uy; 6uy; 2uy; 4uy; 6uy; 2uy; 6uy; 6uy; 4uy;
2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 10uy; 2uy; 10uy; 2uy; 4uy; 2uy; 4uy |]
let private WHTS, WCRC = BWPS.Length, (GAPS |> Seq.map uint64 |> Seq.sum) / 2UL //check: 48 and 210UL
let private WHLNDXS = //look up table for (position - 23) mod WCRC - 0 to WCRC - 1, to wheel index - 0 to WHTS - 1
(GAPS |> Seq.take WHTS |> Seq.mapi (fun i x -> i, x)
|> Seq.collect (fun (i, n) -> seq { for n = 1uy to n do yield byte i })) |> Seq.toArray
let primes topNum =
if topNum >= ((1UL <<< 30) / 3UL - 1UL) * uint64 WCRC + FRSTBP then failwith "Argument too large for algorithm!!!"
let BFSZ = if topNum < FRSTBP then 6 else ((int ((topNum - FRSTBP) / uint64 WCRC) + 2) * 3) //includes topNum
let cull (cmpsts: _ array) (bps: _ seq) =
let topNdx = uint64 (cmpsts.Length * WRDBTS - 1)
let topRng = topNdx / uint64 WHTS * WCRC + uint64 BWPS.[WHTS - 1] //from size of current array
let kSqrtLim = int (sqrt (float (topRng - WCRC))) / int WCRC //base prime test limit value comp for extra
bps |> Seq.takeWhile (((>=) kSqrtLim) << fst) |> Seq.iter (fun (k, i) -> //for each base prime less than limit
let x = uint64 BWPS.[int i] in let p = (uint64 k * WCRC) + x
let s = p * p - FRSTBP - x * p in let pm = 3 * int p
{ int i .. int i + WHTS - 1 } |> Seq.map (fun i ->
let adj, ni = if i >= WHTS then WCRC, i - WHTS else 0UL, i
let c0 = s + p * (uint64 BWPS.[ni] + adj)
let c0d = c0 / WCRC in let c0n = WHLNDXS.[int (c0 - c0d * WCRC)]
let c0dwtry = 3 * int c0d
if c0n < 16uy then c0dwtry, c0n else if c0n < 32uy then c0dwtry + 1, c0n - 16uy
else c0dwtry + 2, c0n - 32uy )
|> Seq.takeWhile (((>) cmpsts.Length) << fst) //loop maximum of wheel hits times; run tight cull loop for each
|> Seq.iter (fun (ccd, cm) -> //tight culling loops almost all the time, following faster when 32-bit mode
let cmsk = 1us <<< int cm
let inline setCmpst i = cmpsts.[i] <- cmpsts.[i] ||| cmsk
let rec cullp c =
if c < cmpsts.Length then setCmpst c; cullp (c + pm) in cullp ccd) ) //fast for 64 bit mode
let cmpsts = //first generate the small master pre-culled pattern array
let mstr = Array.zeroCreate (11 * 13 * 17 * 19 * WHTS / WRDBTS)
{ WHTS - 4 .. WHTS - 1 } |> Seq.map (fun m -> (-1, byte m)) |> cull mstr //cull 11/13/17/19
let arr = Array.zeroCreate BFSZ
{ 0 .. mstr.Length .. (arr.Length - 1) } //copy repetitions of pre-cull pattern array until full
|> Seq.iter (fun i -> let len = if i + mstr.Length <= arr.Length
then mstr.Length else arr.Length - i
System.Array.Copy(mstr, 0, arr, i, len))
arr //has an slight excess of up to two wheels to come out even in int's/wheels
let inline testCmptst ndx = cmpsts.[ndx >>> 4] &&& (1us <<< (ndx &&& 15)) <> 0us
Seq.initInfinite id |> Seq.collect (fun k -> { 0 .. WHTS - 1 } |> Seq.map (fun i -> k, byte i))
|> Seq.filter (fun (k, i) -> not (testCmptst (WHTS * k + int i))) |> cull cmpsts //cull all composites
//an extremely fast bit counting routine by 16-bit words...
//mask off all buffer composite bits above the top number:
let topd = (topNum - FRSTBP) / WCRC in let topn = int WHLNDXS.[int (topNum - FRSTBP - topd * WCRC)]
let trywrd = int topd * 3
let wrd, wrdn = if topn < 16 then trywrd, topn else if topn < 32 then trywrd + 1, topn - 16
else trywrd + 2, topn - 32
let awrd = if topNum < FRSTBP then -1 else cmpsts.[wrd] <- cmpsts.[wrd] ||| (uint16 -2 <<< wrdn); wrd
{ awrd + 1 .. cmpsts.Length - 1 } |> Seq.iter (fun w -> cmpsts.[w] <- uint16 -1)
let rec count w acc = //tight count loop
if w < cmpsts.Length then //magic int16 bit count algorithm - slightly slower than Look Up Table for int16's
let a = cmpsts.[w] in let b = a - ((a >>> 1) &&& 0x5555us)
let c = (b &&& 0x3333us) + ((b >>> 2) &&& 0x3333us)
count (w + 1) (acc - uint64 ((((c + (c >>> 4)) &&& 0x0F0Fus) * 0x0101us) >>> 8))
else acc in count 0 (uint64 (BPRMS |> Seq.takeWhile ((>=) topNum) |> Seq.length)) + //include base primes
uint64 cmpsts.Length * uint64 WRDBTS //counts bits not set
// seq { yield! BPRMS; |> Seq.takeWhile ((>=) topNum) //very slow but literal way of enumerating resulting primes:
// yield! Seq.initInfinite id
// |> Seq.collect (fun k -> BWPS |> Seq.mapi (fun i x -> WCRC * uint64 k + uint64 x, WHTS * k + i))
// |> Seq.takeWhile (((>=) topNum) << fst)
// |> Seq.filter (not << testCmptst << snd) |> Seq.map (fst) }
module SoA =
let getName = "\r\nUses the Sieve of Atkin to calculate the primes to argument.\r\n"
let private WRDBTS, FRSTBP = 16, 7UL
let private BPRMS = [| 2UL; 3UL; 5UL |]
let private BWPS = [| 7uy;11uy;13uy;17uy;19uy;23uy;29uy;31uy; //base wheel positions + 7
37uy;41uy;43uy;47uy;49uy;53uy;59uy;61uy |]
let private GAPS = [| //wheel position deltas for each computation of next incremental cull position
4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; //2 times for overflow
4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy |]
let private WHTS, WCRC = BWPS.Length, (GAPS |> Seq.map uint64 |> Seq.sum) / 2UL //check: 16 and 60UL
let private WHLNDXS = //look up table for (position - 23) mod WCRC - 0 to WCRC - 1, to wheel index - 0 to WHTS - 1
(GAPS |> Seq.take WHTS |> Seq.mapi (fun i x -> i, x)
|> Seq.collect (fun (i, n) -> seq { for n = 1uy to n do yield byte i })) |> Seq.toArray
let primes topNum =
if topNum >= ((1UL <<< 30) - 1UL) * uint64 WCRC + FRSTBP then failwith "Argument too large for algorithm!!!"
let BFSZ = if topNum < FRSTBP then 2 else int ((topNum - FRSTBP) / WCRC) + 2 //includes topNum with spare
let is_prms = //initialize the prime sieving array
Array.zeroCreate BFSZ //has an slight excess of the rest of the last wheel to come out even in int's/wheels
let setprms (prms: _ array) (bps: _ seq) =
let topNdx = uint64 (prms.Length * WRDBTS - 1)
let topRng = topNdx / uint64 WHTS * WCRC + uint64 BWPS.[WHTS - 1] //maximum value of current array
let kSqrtLim = int (sqrt (float (topRng - WCRC))) / int WCRC //compensate for extra wheel
//run the 4 * x * x + y * y quadratic prime generating sequences = all x's and odd y's
Seq.initInfinite (uint64 << ((+) 1)) |> Seq.map (fun x -> 4UL * x * x + 1UL)
|> Seq.takeWhile ((>=) topRng)
|> Seq.collect (fun st -> { 1UL .. 2UL .. 29UL } |> Seq.map (fun midy -> st + midy * midy - 1UL, int midy + 15)
|> Seq.takeWhile (((>=) topRng) << fst) )
|> Seq.iter (fun (n, myp15) ->
if n < FRSTBP then () else //no action for n = 5 - not one of the modulos for this quadratic
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC
let nm = int (if cr < WCRC then cr else cr - WCRC)
if 1UL <<< nm &&& 0x22022020022002UL = 0UL then () else //if nm in {1,13,17,29,37,41,49,53}
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it
let rec tgl c inc = if c < prms.Length then prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc + 30)
tgl (int cbd) myp15)
//run the 3 * x * x + y * y quadratic prime generating sequences - odd x's and even y's
Seq.initInfinite (uint64 << ((+) 1) << ((*) 2)) |> Seq.map (fun x -> 3UL * x * x + 4UL)
|> Seq.takeWhile ((>=) topRng)
|> Seq.collect (fun st -> { 2UL .. 2UL .. 30UL } |> Seq.map (fun midy -> st + midy * midy - 4UL, int midy + 15)
|> Seq.takeWhile (((>=) topRng) << fst) )
|> Seq.iter (fun (n, myp15) ->
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC
let nm = int (if cr < WCRC then cr else cr - WCRC)
if 1UL <<< nm &&& 0x80080080080UL = 0UL then () else //if cm in {7,19,31,43}
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it
let rec tgl c inc = if c < prms.Length then prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc + 30)
tgl (int cbd) myp15)
//run the 3 * x * x - y * y quadratic prime generating sequences - both even/odd and odd/even combos in x's/y's
Seq.initInfinite (uint64 << ((+) 2)) |> Seq.map (fun x -> 2UL * x * x + 2UL * x - 1UL, x - 1UL)
|> Seq.takeWhile (((>=) topRng) << fst) //reverse order ok in the following
|> Seq.collect (fun (st, by) -> Seq.unfold (fun x -> if x + 28UL < by || x < 1UL then None
else if x < 2UL then Some(x, 0UL) else Some(x, x - 2UL)) by
|> Seq.map (fun midy -> st - midy * midy + by * by, int midy - 15)
|> Seq.takeWhile (((>=) topRng) << fst))
|> Seq.iter (fun (n, mym15) ->
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC
let nm = int (if cr < WCRC then cr else cr - WCRC)
if 1UL <<< nm &&& 0x800800000800800UL = 0UL then () else //if cm in {11,23,47,59}
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it
let rec tgl c inc = if inc > -15 && c < prms.Length then
prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc - 30) in tgl (int cbd) mym15)
//run the prime square-free culling operations:
bps |> Seq.takeWhile (((>=) kSqrtLim) << fst) |> Seq.iter (fun (k, i) -> //for each base prime less than limit
let x = uint64 BWPS.[int i] in let p = (uint64 k * WCRC) + x in let sndx = BWPS.Length - 1
let sqr = p * p in let s = sqr - FRSTBP - sqr * uint64 BWPS.[sndx]
{ sndx .. sndx + WHTS - 1 } |> Seq.map (fun i ->
let adj, ni = if i >= WHTS then WCRC, i - WHTS else 0UL, i
let c0 = s + sqr * (uint64 BWPS.[ni] + adj)
let c0d = c0 / WCRC in let c0n = WHLNDXS.[int (c0 - c0d * WCRC)]
(c0d, c0n) )
|> Seq.takeWhile (((>) (prms.Length - 1)) << int << fst) //loop maximum of wheel hits times; run tight cull loop for each
|> Seq.iter (fun (ccd, cm) -> //tight culling loops almost all the time, following faster when 32-bit mode
let cmsk = ~~~(1us <<< int cm)
let rec cullp c = //tight prime square culling loop
if c < uint64 prms.Length then prms.[int c] <- prms.[int c] &&& cmsk; cullp (c + sqr) in cullp ccd) )
let inline testPrm ndx = is_prms.[int (ndx >>> 4)] &&& (1us <<< (int ndx &&& 15)) <> 0us
Seq.initInfinite id |> Seq.collect (fun k -> { 0 .. WHTS - 1 } |> Seq.map (fun i -> k, byte i))
|> Seq.filter (fun (k, i) -> testPrm (WHTS * int k + int i)) |> setprms is_prms //find all primes
//an extremely fast bit counting routine by 16-bit words => number of primes...
//mask off all buffer prime bits above the top number:
let wrd = (topNum - FRSTBP) / WCRC in let wrdn = int WHLNDXS.[int (topNum - FRSTBP - wrd * WCRC)]
let awrd = if topNum < FRSTBP then -1 else
is_prms.[int wrd] <- is_prms.[int wrd] &&& ~~~(uint16 -2 <<< wrdn); int wrd
{ awrd + 1 .. is_prms.Length - 1 } |> Seq.iter (fun w -> is_prms.[w] <- 0us)
let rec count w acc = //tight count loop
if w < is_prms.Length then //magic int16 bit count algorithm - slightly slower than Look Up Table for int16's
let a = is_prms.[w] in let b = a - ((a >>> 1) &&& 0x5555us)
let c = (b &&& 0x3333us) + ((b >>> 2) &&& 0x3333us)
count (w + 1) (acc + uint64 ((((c + (c >>> 4)) &&& 0x0F0Fus) * 0x0101us) >>> 8))
else acc in count 0 (uint64 (BPRMS |> Seq.takeWhile ((>=) topNum) |> Seq.length)) //counts set bits incl. base
// seq { yield! BPRMS |> Seq.takeWhile ((>=) topNum); //very slow but literal way of enumerating resulting primes:
// yield! Seq.initInfinite id
// |> Seq.collect (fun k -> BWPS |> Seq.mapi (fun i x -> WCRC * uint64 k + uint64 x, WHTS * k + i))
// |> Seq.takeWhile (((>=) topNum) << fst)
// |> Seq.filter (testPrm << snd) |> Seq.map (fst) }
open SoA
[<EntryPoint>]
let main argv =
printfn "%s" getName
if argv = null || argv.Length = 0 then
failwith "\r\nno command line argument for limit!!!"
let test = primes 100UL // |> Seq.iter (printf "%A ")
let topNum = System.UInt64.Parse argv.[0]
let strt = System.DateTime.Now.Ticks
// let count = { 0 .. 3999 } |> Seq.map (fun i -> SoA.primes topNum) |> Seq.max // |> Seq.length
let count = primes topNum // |> Seq.length
let elpsd = System.DateTime.Now.Ticks - strt
printfn "Found %A primes up to %A in %A milliseconds." count topNum (elpsd / 10000L)
printf "\r\nPress any key to exit:"
System.Console.ReadKey(true) |> ignore
printfn ""
0 // return an integer exit code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.