Skip to content

Instantly share code, notes, and snippets.

@Hermann-SW
Created June 3, 2021 09:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hermann-SW/a2d7c85d151aa704c1fe2ff19b0ccb15 to your computer and use it in GitHub Desktop.
Save Hermann-SW/a2d7c85d151aa704c1fe2ff19b0ccb15 to your computer and use it in GitHub Desktop.
First version of unit256_t "mod_256()" showing speedup over gmplib "mpz_mod()"
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <time.h>
#include <sys/time.h>
#include "gmp.h"
#define N 1000000
typedef __uint128_t uint256_t[2];
void set_256(uint256_t d, __uint128_t l, __uint128_t h) { d[0]=l; d[1]=h; }
inline void add_128(uint256_t d, uint256_t x, __uint128_t a)
{
d[0] = x[0] + a;
d[1] = (d[0] < a) ? x[1]+1 : x[1];
}
void add_256(uint256_t d, uint256_t x, uint256_t a)
{
add_128(d, x, a[0]);
d[1] += a[1];
}
void q(long l) { printf("%016lx ", l); }
void p(long l) { printf("%lx", l); }
void P(__uint128_t i) { if (i>UINT64_MAX) p(i>>64); q(i & 0xFFFFFFFFFFFFFFFF); }
void _P_256(uint256_t x) { if (x[1]>0) P(x[1]); P(x[0]); }
#define P_256(l) { printf("%s: ", #l); _P_256(l); }
#define min(x, y) ((x<y) ? (x) : (y))
int clz(__uint128_t u)
{
unsigned long long h = ((unsigned long long *)&u)[1];
return (h!=0) ? __builtin_clzll(h) : 64 + __builtin_clzll(u);
}
__uint128_t mod_256(uint256_t x, __uint128_t n)
{
if (x[1] == 0) return x[0] % n;
else
{
__uint128_t r = x[1] % n;
int F = clz(n);
int R = clz(r);
for(int i=0; i<128; ++i)
{
if (R>F+1)
{
int h = min(R-(F+1), 128-i);
r <<= h; R-=h; i+=(h-1); continue;
}
r <<= 1; if (r >= n) { r -= n; R=clz(r); }
}
r += (x[0] % n); if (r >= n) r -= n;
return r;
}
}
int main() {
struct timespec t0, t1;
uint256_t s, a;
__uint128_t b, n;
set_256(a, 0x3000000010000000L, 0x7000000050000000L);
n = (1L<<61) - 1;
n *= n;
set_256(s, 0, 0);
clock_gettime(CLOCK_REALTIME, &t0);
for(int i=0; i<N; ++i)
{
b = mod_256(a, n);
add_128(s, s, b);
}
clock_gettime(CLOCK_REALTIME, &t1);
printf("%ldns(1)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec);
P_256(s); puts("");
{
mpz_t a,s,t,n;
mpz_inits(n, s, NULL);
mpz_init_set_ui(a, 1);
mpz_mul_2exp(n, a, 61);
mpz_sub_ui(a, n, 1);
mpz_mul(n, a, a); // (2^61-1)^2
mpz_init_set_ui(t, 0x7000000050000000L);
mpz_mul_2exp(a, t, 128);
mpz_add_ui(t, a, 0x3000000010000000L);
mpz_init_set_ui(s, 0);
clock_gettime(CLOCK_REALTIME, &t0);
for(int i=0; i<N; ++i)
{
mpz_mod(a, t, n);
mpz_add(s, s, a);
}
clock_gettime(CLOCK_REALTIME, &t1);
printf("%ldns(2)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec);
gmp_printf ("%Zx\n", s);
}
exit (0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment