-
-
Save anonymous/d17da4f1a8db2fadf10c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module SoEMWF = | |
let getName = "\r\nUses the Sieve of Eratosthenes to calculate the primes to argument.\r\n" | |
let private WRDBTS, FRSTBP = 16, 23UL | |
let private BPRMS = [| 2UL; 3UL; 5UL; 7UL; 11UL; 13UL; 17UL; 19UL |] | |
let private BWPS = [| //base wheel positions + 23 | |
23uy;29uy;31uy;37uy;41uy;43uy;47uy;53uy; 59uy;61uy;67uy;71uy;73uy;79uy;83uy;89uy; | |
97uy;101uy;103uy;107uy;109uy;113uy;121uy;127uy; 131uy;137uy;139uy;143uy;149uy;151uy;157uy;163uy; | |
167uy;169uy;173uy;179uy;181uy;187uy;191uy;193uy; 197uy;199uy;209uy;211uy;221uy;223uy;227uy;229uy |] | |
let private GAPS = [| //wheel position deltas for each computation of next incremental cull position | |
6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 6uy; 6uy; 2uy; 6uy; 4uy; 2uy; 6uy; 4uy; 6uy; 8uy; | |
4uy; 2uy; 4uy; 2uy; 4uy; 8uy; 6uy; 4uy; 6uy; 2uy; 4uy; 6uy; 2uy; 6uy; 6uy; 4uy; | |
2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 10uy; 2uy; 10uy; 2uy; 4uy; 2uy; 4uy; | |
6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 6uy; 6uy; 2uy; 6uy; 4uy; 2uy; 6uy; 4uy; 6uy; 8uy; //2 rounds for overflow | |
4uy; 2uy; 4uy; 2uy; 4uy; 8uy; 6uy; 4uy; 6uy; 2uy; 4uy; 6uy; 2uy; 6uy; 6uy; 4uy; | |
2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 10uy; 2uy; 10uy; 2uy; 4uy; 2uy; 4uy |] | |
let private WHTS, WCRC = BWPS.Length, (GAPS |> Seq.map uint64 |> Seq.sum) / 2UL //check: 48 and 210UL | |
let private WHLNDXS = //look up table for (position - 23) mod WCRC - 0 to WCRC - 1, to wheel index - 0 to WHTS - 1 | |
(GAPS |> Seq.take WHTS |> Seq.mapi (fun i x -> i, x) | |
|> Seq.collect (fun (i, n) -> seq { for n = 1uy to n do yield byte i })) |> Seq.toArray | |
let primes topNum = | |
if topNum >= ((1UL <<< 30) / 3UL - 1UL) * uint64 WCRC + FRSTBP then failwith "Argument too large for algorithm!!!" | |
let BFSZ = if topNum < FRSTBP then 6 else ((int ((topNum - FRSTBP) / uint64 WCRC) + 2) * 3) //includes topNum | |
let cull (cmpsts: _ array) (bps: _ seq) = | |
let topNdx = uint64 (cmpsts.Length * WRDBTS - 1) | |
let topRng = topNdx / uint64 WHTS * WCRC + uint64 BWPS.[WHTS - 1] //from size of current array | |
let kSqrtLim = int (sqrt (float (topRng - WCRC))) / int WCRC //base prime test limit value comp for extra | |
bps |> Seq.takeWhile (((>=) kSqrtLim) << fst) |> Seq.iter (fun (k, i) -> //for each base prime less than limit | |
let x = uint64 BWPS.[int i] in let p = (uint64 k * WCRC) + x | |
let s = p * p - FRSTBP - x * p in let pm = 3 * int p | |
{ int i .. int i + WHTS - 1 } |> Seq.map (fun i -> | |
let adj, ni = if i >= WHTS then WCRC, i - WHTS else 0UL, i | |
let c0 = s + p * (uint64 BWPS.[ni] + adj) | |
let c0d = c0 / WCRC in let c0n = WHLNDXS.[int (c0 - c0d * WCRC)] | |
let c0dwtry = 3 * int c0d | |
if c0n < 16uy then c0dwtry, c0n else if c0n < 32uy then c0dwtry + 1, c0n - 16uy | |
else c0dwtry + 2, c0n - 32uy ) | |
|> Seq.takeWhile (((>) cmpsts.Length) << fst) //loop maximum of wheel hits times; run tight cull loop for each | |
|> Seq.iter (fun (ccd, cm) -> //tight culling loops almost all the time, following faster when 32-bit mode | |
let cmsk = 1us <<< int cm | |
let inline setCmpst i = cmpsts.[i] <- cmpsts.[i] ||| cmsk | |
let rec cullp c = | |
if c < cmpsts.Length then setCmpst c; cullp (c + pm) in cullp ccd) ) //fast for 64 bit mode | |
let cmpsts = //first generate the small master pre-culled pattern array | |
let mstr = Array.zeroCreate (11 * 13 * 17 * 19 * WHTS / WRDBTS) | |
{ WHTS - 4 .. WHTS - 1 } |> Seq.map (fun m -> (-1, byte m)) |> cull mstr //cull 11/13/17/19 | |
let arr = Array.zeroCreate BFSZ | |
{ 0 .. mstr.Length .. (arr.Length - 1) } //copy repetitions of pre-cull pattern array until full | |
|> Seq.iter (fun i -> let len = if i + mstr.Length <= arr.Length | |
then mstr.Length else arr.Length - i | |
System.Array.Copy(mstr, 0, arr, i, len)) | |
arr //has an slight excess of up to two wheels to come out even in int's/wheels | |
let inline testCmptst ndx = cmpsts.[ndx >>> 4] &&& (1us <<< (ndx &&& 15)) <> 0us | |
Seq.initInfinite id |> Seq.collect (fun k -> { 0 .. WHTS - 1 } |> Seq.map (fun i -> k, byte i)) | |
|> Seq.filter (fun (k, i) -> not (testCmptst (WHTS * k + int i))) |> cull cmpsts //cull all composites | |
//an extremely fast bit counting routine by 16-bit words... | |
//mask off all buffer composite bits above the top number: | |
let topd = (topNum - FRSTBP) / WCRC in let topn = int WHLNDXS.[int (topNum - FRSTBP - topd * WCRC)] | |
let trywrd = int topd * 3 | |
let wrd, wrdn = if topn < 16 then trywrd, topn else if topn < 32 then trywrd + 1, topn - 16 | |
else trywrd + 2, topn - 32 | |
let awrd = if topNum < FRSTBP then -1 else cmpsts.[wrd] <- cmpsts.[wrd] ||| (uint16 -2 <<< wrdn); wrd | |
{ awrd + 1 .. cmpsts.Length - 1 } |> Seq.iter (fun w -> cmpsts.[w] <- uint16 -1) | |
let rec count w acc = //tight count loop | |
if w < cmpsts.Length then //magic int16 bit count algorithm - slightly slower than Look Up Table for int16's | |
let a = cmpsts.[w] in let b = a - ((a >>> 1) &&& 0x5555us) | |
let c = (b &&& 0x3333us) + ((b >>> 2) &&& 0x3333us) | |
count (w + 1) (acc - uint64 ((((c + (c >>> 4)) &&& 0x0F0Fus) * 0x0101us) >>> 8)) | |
else acc in count 0 (uint64 (BPRMS |> Seq.takeWhile ((>=) topNum) |> Seq.length)) + //include base primes | |
uint64 cmpsts.Length * uint64 WRDBTS //counts bits not set | |
// seq { yield! BPRMS; |> Seq.takeWhile ((>=) topNum) //very slow but literal way of enumerating resulting primes: | |
// yield! Seq.initInfinite id | |
// |> Seq.collect (fun k -> BWPS |> Seq.mapi (fun i x -> WCRC * uint64 k + uint64 x, WHTS * k + i)) | |
// |> Seq.takeWhile (((>=) topNum) << fst) | |
// |> Seq.filter (not << testCmptst << snd) |> Seq.map (fst) } | |
module SoA = | |
let getName = "\r\nUses the Sieve of Atkin to calculate the primes to argument.\r\n" | |
let private WRDBTS, FRSTBP = 16, 7UL | |
let private BPRMS = [| 2UL; 3UL; 5UL |] | |
let private BWPS = [| 7uy;11uy;13uy;17uy;19uy;23uy;29uy;31uy; //base wheel positions + 7 | |
37uy;41uy;43uy;47uy;49uy;53uy;59uy;61uy |] | |
let private GAPS = [| //wheel position deltas for each computation of next incremental cull position | |
4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; //2 times for overflow | |
4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy; 4uy; 2uy; 4uy; 2uy; 4uy; 6uy; 2uy; 6uy |] | |
let private WHTS, WCRC = BWPS.Length, (GAPS |> Seq.map uint64 |> Seq.sum) / 2UL //check: 16 and 60UL | |
let private WHLNDXS = //look up table for (position - 23) mod WCRC - 0 to WCRC - 1, to wheel index - 0 to WHTS - 1 | |
(GAPS |> Seq.take WHTS |> Seq.mapi (fun i x -> i, x) | |
|> Seq.collect (fun (i, n) -> seq { for n = 1uy to n do yield byte i })) |> Seq.toArray | |
let primes topNum = | |
if topNum >= ((1UL <<< 30) - 1UL) * uint64 WCRC + FRSTBP then failwith "Argument too large for algorithm!!!" | |
let BFSZ = if topNum < FRSTBP then 2 else int ((topNum - FRSTBP) / WCRC) + 2 //includes topNum with spare | |
let is_prms = //initialize the prime sieving array | |
Array.zeroCreate BFSZ //has an slight excess of the rest of the last wheel to come out even in int's/wheels | |
let setprms (prms: _ array) (bps: _ seq) = | |
let topNdx = uint64 (prms.Length * WRDBTS - 1) | |
let topRng = topNdx / uint64 WHTS * WCRC + uint64 BWPS.[WHTS - 1] //maximum value of current array | |
let kSqrtLim = int (sqrt (float (topRng - WCRC))) / int WCRC //compensate for extra wheel | |
//run the 4 * x * x + y * y quadratic prime generating sequences = all x's and odd y's | |
Seq.initInfinite (uint64 << ((+) 1)) |> Seq.map (fun x -> 4UL * x * x + 1UL) | |
|> Seq.takeWhile ((>=) topRng) | |
|> Seq.collect (fun st -> { 1UL .. 2UL .. 29UL } |> Seq.map (fun midy -> st + midy * midy - 1UL, int midy + 15) | |
|> Seq.takeWhile (((>=) topRng) << fst) ) | |
|> Seq.iter (fun (n, myp15) -> | |
if n < FRSTBP then () else //no action for n = 5 - not one of the modulos for this quadratic | |
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC | |
let nm = int (if cr < WCRC then cr else cr - WCRC) | |
if 1UL <<< nm &&& 0x22022020022002UL = 0UL then () else //if nm in {1,13,17,29,37,41,49,53} | |
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it | |
let rec tgl c inc = if c < prms.Length then prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc + 30) | |
tgl (int cbd) myp15) | |
//run the 3 * x * x + y * y quadratic prime generating sequences - odd x's and even y's | |
Seq.initInfinite (uint64 << ((+) 1) << ((*) 2)) |> Seq.map (fun x -> 3UL * x * x + 4UL) | |
|> Seq.takeWhile ((>=) topRng) | |
|> Seq.collect (fun st -> { 2UL .. 2UL .. 30UL } |> Seq.map (fun midy -> st + midy * midy - 4UL, int midy + 15) | |
|> Seq.takeWhile (((>=) topRng) << fst) ) | |
|> Seq.iter (fun (n, myp15) -> | |
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC | |
let nm = int (if cr < WCRC then cr else cr - WCRC) | |
if 1UL <<< nm &&& 0x80080080080UL = 0UL then () else //if cm in {7,19,31,43} | |
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it | |
let rec tgl c inc = if c < prms.Length then prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc + 30) | |
tgl (int cbd) myp15) | |
//run the 3 * x * x - y * y quadratic prime generating sequences - both even/odd and odd/even combos in x's/y's | |
Seq.initInfinite (uint64 << ((+) 2)) |> Seq.map (fun x -> 2UL * x * x + 2UL * x - 1UL, x - 1UL) | |
|> Seq.takeWhile (((>=) topRng) << fst) //reverse order ok in the following | |
|> Seq.collect (fun (st, by) -> Seq.unfold (fun x -> if x + 28UL < by || x < 1UL then None | |
else if x < 2UL then Some(x, 0UL) else Some(x, x - 2UL)) by | |
|> Seq.map (fun midy -> st - midy * midy + by * by, int midy - 15) | |
|> Seq.takeWhile (((>=) topRng) << fst)) | |
|> Seq.iter (fun (n, mym15) -> | |
let cbd = (n - FRSTBP) / WCRC in let cr = n - cbd * WCRC | |
let nm = int (if cr < WCRC then cr else cr - WCRC) | |
if 1UL <<< nm &&& 0x800800000800800UL = 0UL then () else //if cm in {11,23,47,59} | |
let cm = 1us <<< (int WHLNDXS.[int (cr - FRSTBP)]) //following is the tight loop that does it | |
let rec tgl c inc = if inc > -15 && c < prms.Length then | |
prms.[c] <- prms.[c] ^^^ cm; tgl (c + inc) (inc - 30) in tgl (int cbd) mym15) | |
//run the prime square-free culling operations: | |
bps |> Seq.takeWhile (((>=) kSqrtLim) << fst) |> Seq.iter (fun (k, i) -> //for each base prime less than limit | |
let x = uint64 BWPS.[int i] in let p = (uint64 k * WCRC) + x in let sndx = BWPS.Length - 1 | |
let sqr = p * p in let s = sqr - FRSTBP - sqr * uint64 BWPS.[sndx] | |
{ sndx .. sndx + WHTS - 1 } |> Seq.map (fun i -> | |
let adj, ni = if i >= WHTS then WCRC, i - WHTS else 0UL, i | |
let c0 = s + sqr * (uint64 BWPS.[ni] + adj) | |
let c0d = c0 / WCRC in let c0n = WHLNDXS.[int (c0 - c0d * WCRC)] | |
(c0d, c0n) ) | |
|> Seq.takeWhile (((>) (prms.Length - 1)) << int << fst) //loop maximum of wheel hits times; run tight cull loop for each | |
|> Seq.iter (fun (ccd, cm) -> //tight culling loops almost all the time, following faster when 32-bit mode | |
let cmsk = ~~~(1us <<< int cm) | |
let rec cullp c = //tight prime square culling loop | |
if c < uint64 prms.Length then prms.[int c] <- prms.[int c] &&& cmsk; cullp (c + sqr) in cullp ccd) ) | |
let inline testPrm ndx = is_prms.[int (ndx >>> 4)] &&& (1us <<< (int ndx &&& 15)) <> 0us | |
Seq.initInfinite id |> Seq.collect (fun k -> { 0 .. WHTS - 1 } |> Seq.map (fun i -> k, byte i)) | |
|> Seq.filter (fun (k, i) -> testPrm (WHTS * int k + int i)) |> setprms is_prms //find all primes | |
//an extremely fast bit counting routine by 16-bit words => number of primes... | |
//mask off all buffer prime bits above the top number: | |
let wrd = (topNum - FRSTBP) / WCRC in let wrdn = int WHLNDXS.[int (topNum - FRSTBP - wrd * WCRC)] | |
let awrd = if topNum < FRSTBP then -1 else | |
is_prms.[int wrd] <- is_prms.[int wrd] &&& ~~~(uint16 -2 <<< wrdn); int wrd | |
{ awrd + 1 .. is_prms.Length - 1 } |> Seq.iter (fun w -> is_prms.[w] <- 0us) | |
let rec count w acc = //tight count loop | |
if w < is_prms.Length then //magic int16 bit count algorithm - slightly slower than Look Up Table for int16's | |
let a = is_prms.[w] in let b = a - ((a >>> 1) &&& 0x5555us) | |
let c = (b &&& 0x3333us) + ((b >>> 2) &&& 0x3333us) | |
count (w + 1) (acc + uint64 ((((c + (c >>> 4)) &&& 0x0F0Fus) * 0x0101us) >>> 8)) | |
else acc in count 0 (uint64 (BPRMS |> Seq.takeWhile ((>=) topNum) |> Seq.length)) //counts set bits incl. base | |
// seq { yield! BPRMS |> Seq.takeWhile ((>=) topNum); //very slow but literal way of enumerating resulting primes: | |
// yield! Seq.initInfinite id | |
// |> Seq.collect (fun k -> BWPS |> Seq.mapi (fun i x -> WCRC * uint64 k + uint64 x, WHTS * k + i)) | |
// |> Seq.takeWhile (((>=) topNum) << fst) | |
// |> Seq.filter (testPrm << snd) |> Seq.map (fst) } | |
open SoA | |
[<EntryPoint>] | |
let main argv = | |
printfn "%s" getName | |
if argv = null || argv.Length = 0 then | |
failwith "\r\nno command line argument for limit!!!" | |
let test = primes 100UL // |> Seq.iter (printf "%A ") | |
let topNum = System.UInt64.Parse argv.[0] | |
let strt = System.DateTime.Now.Ticks | |
// let count = { 0 .. 3999 } |> Seq.map (fun i -> SoA.primes topNum) |> Seq.max // |> Seq.length | |
let count = primes topNum // |> Seq.length | |
let elpsd = System.DateTime.Now.Ticks - strt | |
printfn "Found %A primes up to %A in %A milliseconds." count topNum (elpsd / 10000L) | |
printf "\r\nPress any key to exit:" | |
System.Console.ReadKey(true) |> ignore | |
printfn "" | |
0 // return an integer exit code |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment