Last active
April 8, 2020 09:18
-
-
Save ykm11/c71a49ba92eb4e43cf4047d7d8bad6e7 to your computer and use it in GitHub Desktop.
メルセンヌ素数2^{521}-1の逆元計算の速度比較
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
mpn_copyi(r, x, SIZE); // r <- x | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^4 | |
for (size_t i = 0; i < 518; i++) { | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- b^2 | |
} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0, 0}; | |
mp_limb_t b[SIZE] = {0}; | |
const int N = 1000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert1: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert2: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] invert1: 84.242000 usec | |
[+] invert2: 115.085000 usec | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
mpn_copyi(r, x, SIZE); // r <- x | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_mul_n(tmp, b, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{3} | |
for (size_t i = 0; i < 518/2; i++) { | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- r^{4} | |
mpn_mul_n(tmp, r, b, SIZE); // r <- r * x^{3} | |
mod(r, tmp, p, tp, sp); | |
} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0, 0}; | |
mp_limb_t b[SIZE] = {0}; | |
const int N = 1000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert1: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert2: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] invert1: 82.291000 usec | |
[+] invert2: 83.924000 usec | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{2} | |
mpn_sqr(tmp, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{4} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{6} | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{7} | |
mpn_copyi(b, (const mp_limb_t*)r, SIZE); // b <- x^{7} | |
for (size_t i = 0; i < 516/3; i++) { | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- r^{8} | |
mpn_mul_n(tmp, r, b, SIZE); // r <- r * x^{7} | |
mod(r, tmp, p, tp, sp); | |
} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0, 0}; | |
mp_limb_t b[SIZE] = {0}; | |
const int N = 1000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert1: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert2: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] invert1: 67.267000 usec | |
[+] invert2: 71.901000 usec | |
""" | |
# for ((i=0; i<10; i++)); do ./a.out; done | |
# で繰り返すとmpn_invertは速くなっていってた CPUが寝てたのかな |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_zero(t, SIZE*2); | |
mpn_zero(s, SIZE*2); | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
#if 0 | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
#else | |
for (size_t i = 0; i < SIZE; i++) { | |
s[i+8] = t[i]; | |
} | |
mpn_lshift(s, (const mp_limb_t*)s, SIZE*2, 9); | |
mpn_sub_n(s, (const mp_limb_t*)s, (const mp_limb_t*)t, SIZE*2); // (T mod R)*(N+1) - (T mod R) | |
#endif | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{2} | |
mpn_mul_n(tmp, b, x, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{3} | |
mpn_sqr(tmp, r, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{12} | |
mpn_mul_n(tmp, b, r, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{15} | |
mpn_copyi(r, (const mp_limb_t*)b, SIZE); // r <- x^{15} | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{30} | |
mpn_mul_n(tmp, b, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{31} | |
for (size_t i = 0; i < 515/5; i++) { | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // 1 | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // 2 | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // 3 | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // 4 | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- r^{32} | |
mpn_mul_n(tmp, r, b, SIZE); // r <- r * x^{31} | |
mod(r, tmp, p, tp, sp); | |
} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0, 0}; | |
mp_limb_t b[SIZE] = {0}; | |
const int N = 1000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert1: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert2: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] invert1: 67.029000 usec | |
[+] invert2: 58.144000 usec | |
""" | |
# modにmul使ってたの見落としてた. 変えたら圧倒的勝利できた. | |
# たぶん3bitsでもいい勝負になる |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_zero(t, SIZE*2); | |
mpn_zero(s, SIZE*2); | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
#if 0 | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
#else | |
for (size_t i = 0; i < SIZE; i++) { | |
s[i+8] = t[i]; | |
} | |
mpn_lshift(s, (const mp_limb_t*)s, SIZE*2, 9); | |
mpn_sub_n(s, (const mp_limb_t*)s, (const mp_limb_t*)t, SIZE*2); // (T mod R)*(N+1) - (T mod R) | |
#endif | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{2} | |
mpn_mul_n(tmp, b, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{3} | |
mpn_sqr(tmp, b, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{12} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{15} | |
mpn_sqr(tmp, r, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{240} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{255} | |
mpn_sqr(tmp, r, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{65280} | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{130560} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{510} | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{511} | |
mpn_mul_n(tmp, b, r, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{131071} | |
for (size_t i = 0; i < (519-9)/17; i++) { | |
for (size_t t = 0; t < 17; t++) { | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- | |
} | |
mpn_mul_n(tmp, r, b, SIZE); // r <- r * x^{131071} | |
mod(r, tmp, p, tp, sp); | |
} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0, 0}; | |
mp_limb_t b[SIZE] = {0}; | |
const int N = 10000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert1: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert2: %f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] mpn_sec_invert: 62.686600 usec | |
[+] mpz_invert: 1.582700 usec | |
[+] invert: 50.712900 usec | |
""" | |
# mpz_invertも追加してみた. 速すぎる. | |
# 自分で実装するmpn_invertは多分これが一番速いんじゃないかなぁ.. | |
# もっと速くするならmulMod, sqrModを速くするとよい. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <time.h> | |
#include <gmpxx.h> | |
#define SIZE 9 | |
static void set_mpz_t(mpz_t& z, const uint64_t* p, int n) { | |
int s = n; | |
while (s > 0) { | |
if (p[s - 1]) break; | |
s--; | |
} | |
z->_mp_alloc = n; | |
z->_mp_size = s; | |
z->_mp_d = (mp_limb_t*)const_cast<uint64_t*>(p); | |
} | |
void dump(const uint64_t *p, int n) { | |
mpz_t t; | |
set_mpz_t(t, p, n); | |
std::cout << t << std::endl; | |
} | |
void dump(const uint64_t *p) { | |
mpz_t t; | |
set_mpz_t(t, p, SIZE); | |
std::cout << t << std::endl; | |
} | |
static inline void mod(mp_limb_t *z, const mp_limb_t *XY, const mp_limb_t *p, mp_limb_t *t, mp_limb_t *s) { | |
// (T + (T mod R)*N) / R | |
mpn_zero(t, SIZE*2); | |
mpn_zero(s, SIZE*2); | |
mpn_and_n(t, XY, p, SIZE); // T mod R | |
#if 0 | |
mpn_mul_n(s, (const mp_limb_t*)t, p, SIZE); | |
#else | |
for (size_t i = 0; i < SIZE; i++) { | |
s[i+8] = t[i]; | |
} | |
mpn_lshift(s, (const mp_limb_t*)s, SIZE*2, 9); | |
mpn_sub_n(s, (const mp_limb_t*)s, (const mp_limb_t*)t, SIZE*2); // (T mod R)*(N+1) - (T mod R) | |
#endif | |
mpn_add_n(t, (const mp_limb_t*)s, XY, SIZE*2); // (T + (T mod R)*N) | |
mpn_rshift(t, (const mp_limb_t*)t, SIZE*2, 9); | |
for (size_t i = 0; i < SIZE; i++) { // (T + (T mod R)*N) / R | |
t[i] = t[i+8]; | |
} | |
if (mpn_cmp((const mp_limb_t*)t, p, SIZE) >= 0) { | |
mpn_sub_n(t, (const mp_limb_t*)t, p, SIZE); | |
} | |
mpn_copyi(z, (const mp_limb_t*)t, SIZE); | |
} | |
void invert1(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
mp_limb_t tp[mpn_sec_invert_itch(SIZE)]; | |
mp_limb_t ap[SIZE]; | |
mpn_copyi(ap, x, SIZE); | |
//mpn_sec_invert(r, ap, p, SIZE, 2*SIZE*GMP_NUMB_BITS, tp); | |
mpn_sec_invert(r, ap, p, SIZE, 521*2, tp); | |
} | |
void invert2(mp_limb_t *r, const mp_limb_t* x, const mp_limb_t* p) { | |
#if 0 | |
mp_limb_t tp[SIZE*2]; | |
mp_limb_t sp[SIZE*2]; | |
mp_limb_t tmp[SIZE*2]; | |
mp_limb_t b[SIZE]; | |
#else | |
mp_limb_t *tp = new mp_limb_t[SIZE*2]; | |
mp_limb_t *sp = new mp_limb_t[SIZE*2]; | |
mp_limb_t *tmp = new mp_limb_t[SIZE*2]; | |
mp_limb_t *b = new mp_limb_t[SIZE]; | |
#endif | |
mpn_sqr(tmp, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{2} | |
mpn_mul_n(tmp, b, x, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{3} | |
mpn_sqr(tmp, b, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{12} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{15} | |
mpn_sqr(tmp, r, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{240} | |
mpn_mul_n(tmp, r, b, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{255} | |
mpn_sqr(tmp, r, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{65280} | |
mpn_sqr(tmp, b, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{130560} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{510} | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- x^{511} | |
mpn_mul_n(tmp, b, r, SIZE); | |
mod(b, tmp, p, tp, sp); // b <- x^{131071} | |
for (size_t i = 0; i < (519-9)/17; i++) { | |
for (size_t t = 0; t < 17; t++) { | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); // r <- | |
} | |
mpn_mul_n(tmp, r, b, SIZE); // r <- r * x^{131071} | |
mod(r, tmp, p, tp, sp); | |
} | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_sqr(tmp, r, SIZE); | |
mod(r, tmp, p, tp, sp); | |
mpn_mul_n(tmp, r, x, SIZE); | |
mod(r, tmp, p, tp, sp); | |
} | |
void invert3(mpz_t &z, const mpz_t &x, const mpz_t &p) { | |
mpz_invert(z, x, p); | |
} | |
void invert4(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *p) { | |
mp_limb_t g[SIZE]; | |
mp_limb_t u[SIZE]; | |
mp_limb_t v[SIZE]; | |
mp_limb_t s[SIZE+1]; | |
mp_size_t sn; | |
mpn_copyi(u, x, SIZE); | |
mpn_copyi(v, p, SIZE); | |
mpn_gcdext(g, s, &sn, u, SIZE, v, SIZE); | |
mpn_copyi(z, (const mp_limb_t*)s, SIZE); | |
if (sn < 0) { | |
mpn_sub_n(z, (const mp_limb_t*)p, (const mp_limb_t*)z, SIZE); | |
} | |
} | |
int main() { | |
mp_limb_t p[SIZE] = { | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, | |
0x1ff | |
}; | |
mp_limb_t a[SIZE] = { | |
0x9164821638291631L, 0x1372917431694363L, 0x9278732747374837L, 0x2913789724789173L, | |
0x6392641693692173L, 0xFFFFFFFFF3213729L, 0xDEADBEEFDEADBEEFL, 0x2913789724789173L, 0x1F}; | |
mp_limb_t b[SIZE] = {0}; | |
#if 0 | |
dump(p); | |
dump(a); | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
dump(b); | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
dump(b); | |
invert4(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
dump(b); | |
return 0; | |
#endif | |
const int N = 10000; | |
time_t begin, end; | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert1(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] mpn_sec_invert: \t%f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
mpz_t z, x, mod; | |
mpz_init(z); | |
set_mpz_t(x, (const uint64_t*)a, SIZE); | |
set_mpz_t(mod, (const uint64_t*)p, SIZE); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert3(z, x, mod); | |
} | |
end = clock(); | |
printf("[+] mpz_invert: \t%f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert2(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert(pow): \t%f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
begin = clock(); | |
for (int i = 0; i < N; i++) { | |
invert4(b, (const mp_limb_t*)a, (const mp_limb_t*)p); | |
} | |
end = clock(); | |
printf("[+] invert(extgcd): \t%f usec\n", double(end-begin)*1e6/CLOCKS_PER_SEC/N); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ g++ pow521.cpp -lgmpxx -lgmp -Ofast && ./a.out | |
""" | |
[+] mpn_sec_invert: 62.267000 usec | |
[+] mpz_invert: 1.634500 usec | |
[+] invert(pow): 51.293900 usec | |
[+] invert(extgcd): 1.523400 usec | |
""" | |
# mpn_gcdextも追加した. mpz_invertよりも速い. | |
# mpn_sec_がつく関数は遅い. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment