Last active
June 7, 2021 02:36
-
-
Save Hermann-SW/83c8ab9e10a0bb64d770af543ed08445 to your computer and use it in GitHub Desktop.
compares runtimes of 1 million additions of squares of 128bit numbers (my uint256_t / gmplib / ttmath)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
$ g++ -Ittmath -O3 -Wall -Wextra -pedantic -fomit-frame-pointer -m64 -mtune=skylake -march=broadwell -o sqr sqr.cpp -lgmp | |
$ for((i=0; i<5; ++i)); do ./sqr ; done | grep ns | sort -n | |
2786120ns(1) | |
2955138ns(1) | |
3052486ns(1) | |
3305607ns(1) | |
6627298ns(1) | |
14803914ns(3) | |
15377249ns(3) | |
15396184ns(3) | |
15749061ns(3) | |
16054087ns(2) | |
16415241ns(2) | |
16507217ns(2) | |
16900889ns(2) | |
17525356ns(3) | |
24720449ns(2) | |
$ | |
*/ | |
#include <ttmath/ttmath.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <inttypes.h> | |
#include <assert.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include "gmp.h" | |
#define N 1000000 | |
#define UINT128_C(u) ((__uint128_t)u) | |
#define UINT128(h, l) (UINT128_C(h)<<64 | l) | |
void pu64(__uint64_t u) { printf("%" PRIu64, u); } | |
void pu640(__uint64_t u) { printf("%019" PRIu64, u); } | |
#define D19_ UINT64_C(10000000000000000000) | |
const __uint128_t d19_ = D19_; | |
const __uint128_t d38_ = (UINT128_C(D19_)*D19_); | |
const __uint128_t UINT128_MAX = UINT128(UINT64_MAX, UINT64_MAX); | |
void pu128(__uint128_t u) | |
{ | |
if (u < d19_) pu64(u); | |
else if (u < d38_) { pu64(u/d19_); pu640(u%d19_); } | |
else { pu64(u/d38_); u%=d38_; pu640(u/d19_); pu640(u%d19_); } | |
} | |
typedef __uint128_t uint256_t[2]; | |
#define UINT256(h, l) { l, h } | |
const uint256_t UINT256_MAX = { UINT128_MAX, UINT128_MAX }; | |
void q(long l) { printf("%016lX ", l); } | |
void p(long l) { printf("%lX", l); } | |
void P(__uint128_t i) { if (i>UINT64_MAX) p(i>>64); q(i & 0xFFFFFFFFFFFFFFFF); } | |
void _P_256(const uint256_t x) { if (x[1]>0) P(x[1]); P(x[0]); } | |
#define P_256(l) { printf("%s: ", #l); _P_256(l); } | |
void set_256_(uint256_t d, const uint256_t s) { d[0]=s[0]; d[1]=s[1]; } | |
void set_256(uint256_t d, __uint128_t l, __uint128_t h) { d[0]=l; d[1]=h; } | |
void add_128(uint256_t d, uint256_t x, __uint128_t a) | |
{ | |
d[0] = x[0] + a; | |
d[1] = (d[0] < a) ? x[1]+1 : x[1]; | |
} | |
void add_256(uint256_t d, uint256_t x, const uint256_t a) | |
{ | |
add_128(d, x, a[0]); | |
d[1] += a[1]; | |
} | |
void sub_128(uint256_t d, uint256_t x, __uint128_t a) | |
{ | |
if (x[0] < a) x[1] -= 1; | |
d[0] = x[0] - a; | |
} | |
void sub_256(uint256_t d, uint256_t x, const uint256_t a) | |
{ | |
sub_128(d, x, a[0]); | |
d[1] -= a[1]; | |
} | |
int lt_256(const uint256_t a, const uint256_t b) | |
{ | |
return (a[1]<b[1]) || ((a[1]==b[1]) && (a[0]<b[0])); | |
} | |
int le_256(const uint256_t a, const uint256_t b) | |
{ | |
return (a[1]<b[1]) || ((a[1]==b[1]) && (a[0]<=b[0])); | |
} | |
void shl_256(uint256_t d, long s) | |
{ | |
d[1] <<= s; | |
d[1] += (d[0] >> (128-s)); | |
d[0] <<= s; | |
} | |
void mul10(uint256_t d, uint256_t x) | |
{ | |
uint256_t t = { x[0], x[1] }; | |
shl_256(t, 2); | |
add_256(d, x, t); | |
shl_256(d, 1); | |
} | |
const uint256_t UINT256_MAX_10th = UINT256( UINT128(0x1999999999999999, 0x9999999999999999), UINT128(0x9999999999999999, 0x999999999999999A) ); | |
void pu256_(uint256_t v, uint256_t t, const uint256_t o) | |
{ | |
if (!lt_256(v, t) && le_256(o, UINT256_MAX_10th)) | |
{ | |
uint256_t nt, no = { t[0], t[1] }; | |
mul10(nt, t); | |
pu256_(v, nt, no); | |
} | |
char d = '0'; | |
while (le_256(o, v)) | |
{ | |
sub_256(v, v, o); | |
++d; | |
} | |
putchar(d); | |
} | |
void pu256(const uint256_t u) | |
{ | |
if ((u[1]==0) && (u[0]==0)) putchar('0'); | |
else | |
{ | |
uint256_t v = { u[0], u[1] }, t = UINT256( 0, 10 ), o = UINT256( 0, 1 ); | |
pu256_(v, t, o); | |
} | |
} | |
void sqr_128(uint256_t d, const __uint128_t x) | |
{ | |
if (x <= UINT64_MAX) | |
{ | |
d[0] = x*x; | |
d[1] = 0; | |
} | |
else | |
{ | |
__uint128_t l = x & UINT64_MAX; | |
__uint128_t h = x >> 64; | |
uint256_t yz, w; | |
set_256(d, l * l, 0); | |
set_256(yz, l * h, 0); | |
shl_256(yz, 64+1); | |
set_256(w, 0, h * h); | |
add_256(d, d, yz); | |
add_256(d, d, w); | |
} | |
} | |
int main(int argc, char *argv[] __attribute__((unused))) { | |
if (argc>1) { pu256(UINT256_MAX); puts(""); return 0; } | |
struct timespec t0, t1; | |
uint256_t s, b; | |
volatile __uint128_t a = 0x0006000005000000L; | |
a *= a; | |
set_256(s, 0, 0); | |
clock_gettime(CLOCK_REALTIME, &t0); | |
for(int i=0; i<N; ++i) | |
{ | |
sqr_128(b, a); | |
add_256(s, s, b); | |
} | |
clock_gettime(CLOCK_REALTIME, &t1); | |
printf("%ldns(1)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec); | |
P_256(s); puts(""); | |
pu256(s); puts(""); | |
{ | |
mpz_t a,s,t; | |
mpz_init(a); | |
mpz_init_set_si(s, 0); | |
mpz_init_set_si(t, 0x0006000050000000L); | |
mpz_mul(a, t, t); | |
clock_gettime(CLOCK_REALTIME, &t0); | |
for(int i=0; i<N; ++i) | |
{ | |
mpz_mul(t, a, a); | |
mpz_add(s, s, t); | |
} | |
clock_gettime(CLOCK_REALTIME, &t1); | |
printf("%ldns(2)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec); | |
gmp_printf ("%ZX\n", s); | |
gmp_printf ("%Zd\n", s); | |
} | |
{ | |
// a,b are from <0, 2^256 - 1> | |
ttmath::UInt<4> s; | |
ttmath::UInt<4> a; | |
a = 0x0006000050000000L; | |
a *= a; | |
s = 0; | |
clock_gettime(CLOCK_REALTIME, &t0); | |
for(int i=0; i<N; ++i) | |
s += a*a; | |
clock_gettime(CLOCK_REALTIME, &t1); | |
printf("%ldns(3)\n", (t1.tv_sec-t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec); | |
std::cout << s << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I added const declarations for function args where possible.
New function "pu256()" for printing 256bit number decimally is discussed here:
https://stackoverflow.com/a/67865257/5674289
Calling "sqr" without args gives the benchmarking output for uint256_t/gmplib/ttmath: