Created
June 3, 2021 09:39
-
-
Save Hermann-SW/a2d7c85d151aa704c1fe2ff19b0ccb15 to your computer and use it in GitHub Desktop.
First version of unit256_t "mod_256()" showing speedup over gmplib "mpz_mod()"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include "gmp.h" | |
#define N 1000000 | |
typedef __uint128_t uint256_t[2]; | |
void set_256(uint256_t d, __uint128_t l, __uint128_t h) { d[0]=l; d[1]=h; } | |
inline void add_128(uint256_t d, uint256_t x, __uint128_t a) | |
{ | |
d[0] = x[0] + a; | |
d[1] = (d[0] < a) ? x[1]+1 : x[1]; | |
} | |
void add_256(uint256_t d, uint256_t x, uint256_t a) | |
{ | |
add_128(d, x, a[0]); | |
d[1] += a[1]; | |
} | |
void q(long l) { printf("%016lx ", l); } | |
void p(long l) { printf("%lx", l); } | |
void P(__uint128_t i) { if (i>UINT64_MAX) p(i>>64); q(i & 0xFFFFFFFFFFFFFFFF); } | |
void _P_256(uint256_t x) { if (x[1]>0) P(x[1]); P(x[0]); } | |
#define P_256(l) { printf("%s: ", #l); _P_256(l); } | |
#define min(x, y) ((x<y) ? (x) : (y)) | |
int clz(__uint128_t u) | |
{ | |
unsigned long long h = ((unsigned long long *)&u)[1]; | |
return (h!=0) ? __builtin_clzll(h) : 64 + __builtin_clzll(u); | |
} | |
__uint128_t mod_256(uint256_t x, __uint128_t n) | |
{ | |
if (x[1] == 0) return x[0] % n; | |
else | |
{ | |
__uint128_t r = x[1] % n; | |
int F = clz(n); | |
int R = clz(r); | |
for(int i=0; i<128; ++i) | |
{ | |
if (R>F+1) | |
{ | |
int h = min(R-(F+1), 128-i); | |
r <<= h; R-=h; i+=(h-1); continue; | |
} | |
r <<= 1; if (r >= n) { r -= n; R=clz(r); } | |
} | |
r += (x[0] % n); if (r >= n) r -= n; | |
return r; | |
} | |
} | |
int main() { | |
struct timespec t0, t1; | |
uint256_t s, a; | |
__uint128_t b, n; | |
set_256(a, 0x3000000010000000L, 0x7000000050000000L); | |
n = (1L<<61) - 1; | |
n *= n; | |
set_256(s, 0, 0); | |
clock_gettime(CLOCK_REALTIME, &t0); | |
for(int i=0; i<N; ++i) | |
{ | |
b = mod_256(a, n); | |
add_128(s, s, b); | |
} | |
clock_gettime(CLOCK_REALTIME, &t1); | |
printf("%ldns(1)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec); | |
P_256(s); puts(""); | |
{ | |
mpz_t a,s,t,n; | |
mpz_inits(n, s, NULL); | |
mpz_init_set_ui(a, 1); | |
mpz_mul_2exp(n, a, 61); | |
mpz_sub_ui(a, n, 1); | |
mpz_mul(n, a, a); // (2^61-1)^2 | |
mpz_init_set_ui(t, 0x7000000050000000L); | |
mpz_mul_2exp(a, t, 128); | |
mpz_add_ui(t, a, 0x3000000010000000L); | |
mpz_init_set_ui(s, 0); | |
clock_gettime(CLOCK_REALTIME, &t0); | |
for(int i=0; i<N; ++i) | |
{ | |
mpz_mod(a, t, n); | |
mpz_add(s, s, a); | |
} | |
clock_gettime(CLOCK_REALTIME, &t1); | |
printf("%ldns(2)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec); | |
gmp_printf ("%Zx\n", s); | |
} | |
exit (0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.raspberrypi.org/forums/viewtopic.php?f=33&t=311893&p=1873040#p1873040