-
-
Save polkovnikov/f8e977eed9ccc121a027dbd650e26208 to your computer and use it in GitHub Desktop.
ECM (Elliptic Curve) factorization method, together with Trial Division and Pollard's Rho
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdint> | |
#include <random> | |
#include <stdexcept> | |
#include <type_traits> | |
#include <iomanip> | |
#include <iostream> | |
#include <string> | |
#include <chrono> | |
#include <cmath> | |
#include <sstream> | |
#include <chrono> | |
#include <atomic> | |
#include <set> | |
#include <algorithm> | |
#include <vector> | |
#include <mutex> | |
#include <future> | |
#include <thread> | |
#include <boost/multiprecision/cpp_int.hpp> | |
#define SUPPORT_GMP 1 | |
#if SUPPORT_GMP | |
#include <gmpxx.h> | |
#endif | |
#define ASSERT_MSG(cond, msg) { if (!(cond)) throw std::runtime_error("Assertion (" #cond ") failed at line " + std::to_string(__LINE__) + "! Msg '" + std::string(msg) + "'."); } | |
#define ASSERT(cond) ASSERT_MSG(cond, "") | |
#define COUT(code) { std::unique_lock<std::mutex> lock(cout_mux); std::cout code; std::cout << std::flush; } | |
#define LN { COUT(<< "LN " << __LINE__ << std::endl); } | |
#define DUMP(x) { COUT(<< " " << (#x) << " = " << (x) << std::endl); } | |
#define bisizeof(x) (sizeof(x) * 8) | |
#define BiSize(T) (BiSizeS<T>::value) | |
using u8 = uint8_t; | |
using u32 = uint32_t; | |
using i32 = int32_t; | |
using u64 = uint64_t; | |
using i64 = int64_t; | |
using u128 = boost::multiprecision::uint128_t; | |
using i128 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<128, 128, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<192, 192, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u256 = boost::multiprecision::uint256_t; | |
using i256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<256, 256, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<384, 384, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u512 = boost::multiprecision::uint512_t; | |
using i512 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<512, 512, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u768 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<768, 768, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u1024 = boost::multiprecision::uint1024_t; | |
using i1024 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<1024, 1024, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u1536 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<1536, 1536, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u2048 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<2048, 2048, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using i2048 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<2048, 2048, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u3072 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<3072, 3072, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u4096 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<4096, 4096, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using i4096 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<4096, 4096, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u6144 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<6144, 6144, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u8192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<8192, 8192, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using i8192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<8192, 8192, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u12288 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<12288, 12288, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using u16384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<16384, 16384, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
using i16384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<16384, 16384, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>; | |
using u24576 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<24576, 24576, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>; | |
template <typename T> struct SWordOf; | |
template <> struct SWordOf<u32> { using type = i32; }; | |
template <> struct SWordOf<u64> { using type = i64; }; | |
template <> struct SWordOf<u128> { using type = i128; }; | |
template <> struct SWordOf<u256> { using type = i256; }; | |
template <> struct SWordOf<u512> { using type = i512; }; | |
template <> struct SWordOf<u1024> { using type = i1024; }; | |
template <> struct SWordOf<u2048> { using type = i2048; }; | |
template <> struct SWordOf<u4096> { using type = i4096; }; | |
template <> struct SWordOf<u8192> { using type = i8192; }; | |
template <> struct SWordOf<u16384> { using type = i16384; }; | |
#if SUPPORT_GMP | |
template <> struct SWordOf<mpz_class> { using type = mpz_class; }; | |
#endif | |
template <typename T> struct DWordOf; | |
template <> struct DWordOf<u32> { using type = u64; }; | |
template <> struct DWordOf<u64> { using type = u128; }; | |
template <> struct DWordOf<u128> { using type = u256; }; | |
template <> struct DWordOf<u256> { using type = u512; }; | |
template <> struct DWordOf<u512> { using type = u1024; }; | |
template <> struct DWordOf<u1024> { using type = u2048; }; | |
template <> struct DWordOf<u2048> { using type = u4096; }; | |
template <> struct DWordOf<u4096> { using type = u8192; }; | |
template <> struct DWordOf<u8192> { using type = u16384; }; | |
#if SUPPORT_GMP | |
template <> struct DWordOf<mpz_class> { using type = mpz_class; }; | |
#endif | |
template <typename T> struct TWordOf; | |
template <> struct TWordOf<u64> { using type = u192; }; | |
template <> struct TWordOf<u128> { using type = u384; }; | |
template <> struct TWordOf<u256> { using type = u768; }; | |
template <> struct TWordOf<u512> { using type = u1536; }; | |
template <> struct TWordOf<u1024> { using type = u3072; }; | |
template <> struct TWordOf<u2048> { using type = u6144; }; | |
template <> struct TWordOf<u4096> { using type = u12288; }; | |
template <> struct TWordOf<u8192> { using type = u24576; }; | |
#if SUPPORT_GMP | |
template <> struct TWordOf<mpz_class> { using type = mpz_class; }; | |
#endif | |
template <typename T> struct QWordOf; | |
template <> struct QWordOf<u32> { using type = u128; }; | |
template <> struct QWordOf<u64> { using type = u256; }; | |
template <> struct QWordOf<u128> { using type = u512; }; | |
template <> struct QWordOf<u256> { using type = u1024; }; | |
template <> struct QWordOf<u512> { using type = u2048; }; | |
template <> struct QWordOf<u1024> { using type = u4096; }; | |
template <> struct QWordOf<u2048> { using type = u8192; }; | |
template <> struct QWordOf<u4096> { using type = u16384; }; | |
#if SUPPORT_GMP | |
template <> struct QWordOf<mpz_class> { using type = mpz_class; }; | |
#endif | |
template <typename T> struct BiSizeS; //: std::integral_constant<size_t, bisizeof(T)> {}; | |
template <> struct BiSizeS<u32> : std::integral_constant<size_t, 32> {}; | |
template <> struct BiSizeS<u64> : std::integral_constant<size_t, 64> {}; | |
template <> struct BiSizeS<u128> : std::integral_constant<size_t, 128> {}; | |
template <> struct BiSizeS<u256> : std::integral_constant<size_t, 256> {}; | |
template <> struct BiSizeS<u512> : std::integral_constant<size_t, 512> {}; | |
template <> struct BiSizeS<u1024> : std::integral_constant<size_t, 1024> {}; | |
template <> struct BiSizeS<u2048> : std::integral_constant<size_t, 2048> {}; | |
template <> struct BiSizeS<u4096> : std::integral_constant<size_t, 4096> {}; | |
template <> struct BiSizeS<u8192> : std::integral_constant<size_t, 8192> {}; | |
template <> struct BiSizeS<u16384> : std::integral_constant<size_t, 16384> {}; | |
std::mutex cout_mux; | |
class AtomicMutex { | |
public: | |
auto Locker() { return std::lock_guard<AtomicMutex>(*this); } | |
auto & Flag() { return f_; } | |
void lock() { while (f_.test_and_set(std::memory_order_acquire)) {} } | |
void unlock() { f_.clear(std::memory_order_release); } | |
private: | |
std::atomic_flag f_ = ATOMIC_FLAG_INIT; | |
}; | |
template <typename T> | |
u32 ToU32(T const & x) { | |
#if SUPPORT_GMP | |
if constexpr(std::is_same_v<std::decay_t<T>, mpz_class>) | |
return x.get_ui(); | |
else | |
#endif | |
return u32(x); | |
} | |
template <typename T> | |
double ToDouble(T const & x) { | |
#if SUPPORT_GMP | |
if constexpr(std::is_same_v<std::decay_t<T>, mpz_class>) | |
return x.get_d(); | |
else | |
#endif | |
return double(x); | |
} | |
template <typename T> | |
std::string IntToStr(T n) { | |
if (n == 0) | |
return "0"; | |
std::string r; | |
while (n != 0) { | |
u32 constexpr mod = 1'000'000'000U; | |
std::ostringstream ss; | |
ss << ToU32<T>(n % mod); | |
r = ss.str() + r; | |
n /= mod; | |
} | |
return r; | |
} | |
static auto & RNG() { | |
thread_local std::mt19937_64 rng{123}; //{(u64(std::random_device{}()) << 32) ^ u64(std::random_device{}())}; | |
return rng; | |
} | |
template <typename T> | |
static T RandBits(size_t bisize) { | |
T r = 0; | |
for (size_t i = 0; i < bisize; i += 32) { | |
size_t const cbits = std::min<size_t>(32, bisize - i); | |
r <<= cbits; | |
r ^= u32(RNG()()) >> (32 - cbits); | |
} | |
return r; | |
} | |
template <typename T> | |
static size_t NumBits(T n) { | |
size_t cnt = 0; | |
while (n > 0xFF) { | |
n >>= 8; | |
cnt += 8; | |
} | |
while (n != 0) { | |
n >>= 1; | |
++cnt; | |
} | |
return cnt; | |
} | |
template <typename T> | |
static T RandRange(T end) { | |
if (end <= 1) | |
return 0; | |
size_t const bits = NumBits<T>(end - 1); | |
while (true) { | |
auto const n = RandBits<T>(bits); | |
if (n < end) | |
return n; | |
} | |
} | |
template <typename T> | |
struct UniformIntDistr { | |
UniformIntDistr(T a, T b) : a_(a), diff_(b + 1 - a) { ASSERT(a <= b); } | |
template <typename R> | |
T operator()(R & r) const { return a_ + RandRange<T>(diff_); } | |
T a_ = 0, diff_ = 0; | |
}; | |
template <typename T> | |
static T PowMod(T a, T b, T const & c) { | |
// https://en.wikipedia.org/wiki/Modular_exponentiation | |
using DT = typename DWordOf<T>::type; | |
T r = 1; | |
while (b != 0) { | |
if (ToU32<T>(b & 1)) | |
r = T((DT(r) * a) % c); | |
a = T((DT(a) * a) % c); | |
b >>= 1; | |
} | |
return r; | |
} | |
static double Time() { | |
static auto const gtb = std::chrono::high_resolution_clock::now(); | |
return std::chrono::duration_cast<std::chrono::duration<double>>( | |
std::chrono::high_resolution_clock::now() - gtb).count(); | |
} | |
template <typename Word> | |
class ECPoint { | |
public: | |
using SWord = typename SWordOf<Word>::type; | |
using DWord = typename DWordOf<Word>::type; | |
using TWord = typename TWordOf<Word>::type; | |
static bool constexpr use_barrett = false, is_mpz = std::is_same_v<Word, mpz_class>; | |
class InvError : public std::runtime_error { | |
public: | |
InvError(Word const & _gcd, Word const & _x, Word const & _mod) | |
: std::runtime_error("(gcd " + IntToStr(_gcd) + ", x " + IntToStr(_x) + | |
", mod " + IntToStr(_mod) + ")"), gcd(_gcd), x(_x), mod(_mod) {} | |
Word gcd = 0, x = 0, mod = 0; | |
}; | |
template <typename T> | |
static T Rand() { | |
T r = 0; | |
for (size_t i = 0; i < BiSize(T); i += 64) | |
r ^= T(RNG()()) << i; | |
return r; | |
} | |
template <typename T> | |
static T RandUni(T a, T b) { | |
return UniformIntDistr<T>(a, b)(RNG()); | |
} | |
static Word rand_range(Word const & begin, Word const & end) { | |
ASSERT(begin < end); | |
return UniformIntDistr<Word>(begin, end - 1)(RNG()); | |
} | |
static bool fermat_prp(Word const & n, size_t trials = 32) { | |
// https://en.wikipedia.org/wiki/Fermat_primality_test | |
if (n <= 16) | |
return n == 2 || n == 3 || n == 5 || n == 7 || n == 11 || n == 13; | |
for (size_t i = 0; i < trials; ++i) | |
if (PowMod<Word>(rand_range(2, n - 2), n - 1, n) != 1) | |
return false; | |
return true; | |
} | |
static Word rand_prime_range(Word begin, Word end) { | |
while (true) { | |
Word const p = rand_range(begin, end) | 1; | |
if (fermat_prp(p)) | |
return p; | |
} | |
} | |
static Word rand_prime(size_t bits) { | |
return rand_prime_range(Word(1) << (bits - 1), Word((DWord(1) << bits) - 1)); | |
} | |
std::tuple<Word, size_t> BarrettRS(Word n) { | |
size_t constexpr extra = 3; | |
for (size_t k = 0; k < BiSize(DWord); ++k) { | |
if (2 * (k + extra) < BiSize(Word)) | |
continue; | |
if ((DWord(1) << k) <= DWord(n)) | |
continue; | |
k += extra; | |
ASSERT_MSG(2 * k < BiSize(DWord), "k " + std::to_string(k) + " BiSize(DWord) " + std::to_string(BiSize(DWord))); | |
DWord r = (DWord(1) << (2 * k)) / n; | |
ASSERT_MSG(DWord(r) < (DWord(1) << BiSize(Word)), | |
"k " + std::to_string(k) + " n " + IntToStr(n)); | |
ASSERT(2 * k >= BiSize(Word)); | |
return std::make_tuple(Word(r), size_t(2 * k - BiSize(Word))); | |
} | |
ASSERT(false); | |
} | |
template <bool Adjust> | |
static Word BarrettMod(DWord const & x, Word const & n, Word const & r, size_t s) { | |
if constexpr(!use_barrett) | |
return Word(x % n); | |
else { | |
DWord const q = DWord(((TWord(x) * r) >> BiSize(Word)) >> s); | |
Word t = Word(DWord(x) - q * n); | |
if constexpr(Adjust) { | |
Word const mask = ~Word(SWord(t - n) >> (BiSize(Word) - 1)); | |
t -= mask & n; | |
} | |
return t; | |
} | |
} | |
static Word Adjust(Word const & a, Word const & n) { | |
return a >= n ? a - n : a; | |
} | |
Word modNn(DWord const & a) const { return BarrettMod<false>(a, N_, N_br_, N_bs_); } | |
Word modNa(DWord const & a) const { return BarrettMod<true>(a, N_, N_br_, N_bs_); } | |
Word modQn(DWord const & a) const { return BarrettMod<false>(a, q_, Q_br_, Q_bs_); } | |
Word modQa(DWord const & a) const { return BarrettMod<true>(a, q_, Q_br_, Q_bs_); } | |
static Word mod(DWord const & a, Word const & n) { return Word(a % n); } | |
static ECPoint base_gen(size_t bits = size_t(-1), Word N0 = 0, bool calc_q = false, Word min_order_pfactor = 0) { | |
ASSERT(bits != size_t(-1) || N0 != 0); | |
while (true) { | |
Word N = 0; | |
if (N0 != 0) | |
N = N0; | |
else | |
for (; mod(N, 4) != 3; N = rand_prime(bits)); | |
Word const A = rand_range(1, N); | |
Word x0 = 0, y0 = 0, B = 0; | |
while (true) { | |
x0 = rand_range(1, N); | |
y0 = rand_range(1, N); | |
B = mod(mod(DWord(y0) * y0, N) + N * 2 - mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) - mod(DWord(A) * x0, N), N); | |
if (N0 == 0) { | |
Word const | |
right = mod(mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) + mod(DWord(A) * x0, N) + B, N), | |
y0_calc = PowMod<Word>(right, (N + 1) >> 2, N); | |
if (mod(DWord(y0_calc) * y0_calc, N) != right) | |
continue; | |
ASSERT_MSG(y0 == y0_calc || y0 == N - y0_calc, "x0 " + IntToStr(x0) + " y0 " + IntToStr(y0) + " y0_calc " + | |
IntToStr(y0_calc) + " A " + IntToStr(A) + " B " + IntToStr(B) + " N " + IntToStr(N)); | |
} | |
break; | |
} | |
if (mod(mod(DWord(y0) * y0, N) + 3 * N - mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) - mod(DWord(A) * x0, N) - B, N) != 0) | |
continue; | |
auto const bp = ECPoint(A, B, N, x0, y0, 0, true, calc_q); | |
if (calc_q) { | |
auto BpCheckOrder = [&]{ | |
for (auto e: bp.q_ps()) | |
if (e < min_order_pfactor) | |
return false; | |
return true; | |
}; | |
if (!(bp.q() != 0 && !bp.q_ps().empty() && BpCheckOrder())) | |
continue; | |
ASSERT(bp.q() > 1 && bp * (bp.q() + 1) == bp); | |
} | |
return bp; | |
} | |
ASSERT(false); | |
} | |
ECPoint(Word A, Word B, Word N, Word x, Word y, Word q = 0, bool prepare = true, bool calc_q = false) { | |
if (prepare) { | |
A = mod(A, N); B = mod(B, N); x = mod(x, N); y = mod(y, N); q = mod(q, N); | |
ASSERT(mod(4 * mod(DWord(mod(DWord(A) * A, N)) * A, N) + 27 * mod(DWord(B) * B, N), N) != 0); | |
if constexpr(0) { | |
ASSERT(mod(N, 4) == 3); | |
if (!(x == 0 && y == 0)) { | |
ASSERT(mod(mod(DWord(y) * y, N) + 3 * N - mod(DWord(mod(DWord(x) * x, N)) * x, N) - mod(DWord(A) * x, N) - B, N) == 0); | |
auto root = PowMod<Word>(mod(DWord(mod(DWord(x) * x, N)) * x, N) + mod(DWord(A) * x, N) + B, (N + 1) >> 2, N); | |
ASSERT(y == root || y == N - root); | |
} | |
} | |
if constexpr(use_barrett) { | |
std::tie(N_br_, N_bs_) = BarrettRS(N); | |
if (q != 0) | |
std::tie(Q_br_, Q_bs_) = BarrettRS(q); | |
} | |
} | |
std::tie(A_, B_, N_, x_, y_, q_) = std::tie(A, B, N, x, y, q); | |
if (calc_q) { | |
std::tie(q_, q_ps_) = find_order(); | |
if (q_ != 0) { | |
if constexpr(use_barrett) | |
std::tie(Q_br_, Q_bs_) = BarrettRS(q_); | |
} | |
} | |
} | |
auto copy() const { | |
return ECPoint(A_, B_, N_, x_, y_, q_, false); | |
} | |
auto inf() const { | |
return ECPoint(A_, B_, N_, 0, 0, q_, false); | |
} | |
static auto const & gen_primes(u64 const B) { | |
using T = u64; | |
thread_local std::vector<T> ps = {2, 3}; | |
for (T p = ps.back() + 2; p <= B; p += 2) { | |
bool is_prime = true; | |
for (auto const e: ps) { | |
if (e * e > p) | |
break; | |
if (p % e == 0) { | |
is_prime = false; | |
break; | |
} | |
} | |
if (is_prime) | |
ps.push_back(p); | |
} | |
return ps; | |
} | |
static std::vector<Word> FactorTrialDiv(Word n) { | |
std::vector<Word> fs; | |
Word i = 0, lim = 1000; | |
while (n != 1) { | |
auto const & ps = gen_primes(lim); | |
for (size_t j = i;; ++j) { | |
if (j >= ps.size()) { | |
i = j; | |
lim = ps.back() + 1000; | |
break; | |
} | |
auto const p = ps[j]; | |
while (n % p == 0) { | |
fs.push_back(p); | |
n /= p; | |
} | |
if (n == 1) | |
break; | |
} | |
} | |
return fs; | |
} | |
std::tuple<Word, std::vector<Word>> find_order(Word _m = 1, std::vector<Word> _ps = {}) const { | |
ASSERT(_m <= 2 * N_); | |
bool const first = _ps.empty(); | |
double tim = 0; | |
if (first) | |
tim = Time(); | |
u64 last_prime = 0; | |
if constexpr(1) { | |
auto r = *this; | |
try { | |
r *= _m; | |
} catch (InvError const &) { | |
return std::make_tuple(_m, _ps); | |
} | |
Word const B = 2 * N_; | |
u64 const limit = std::llround(std::pow(ToDouble<Word>(B), 1 / 2.5)); | |
auto const primes = gen_primes(limit + 1); | |
last_prime = primes.back(); | |
for (Word const p: primes) { | |
if (p > limit) | |
break; | |
ASSERT(p <= B); | |
size_t cnt = 0; | |
Word hi = 1; | |
try { | |
for (cnt = 1;; ++cnt) { | |
if (hi * p > B) { | |
cnt -= 1; | |
break; | |
} | |
hi *= p; | |
r *= p; | |
} | |
} catch (InvError const & ex) { | |
_ps.insert(_ps.begin(), cnt, p); | |
auto const res = find_order(hi * _m, _ps); | |
if (first) | |
std::cout << "FindOrderTime " << std::fixed << std::setprecision(3) << (Time() - tim) << ", last_prime " << last_prime << std::endl; | |
return res; | |
} | |
} | |
} else { | |
// Alternative slower way | |
auto r = *this; | |
for (Word i = 0;; ++i) | |
try { | |
r += *this; | |
} catch (InvError const &) { | |
_ps.clear(); | |
return std::make_tuple(i + 2, _ps); | |
} | |
} | |
if (first) | |
std::cout << "FindOrderTime " << std::fixed << std::setprecision(3) << (Time() - tim) << ", last_prime " << last_prime << std::endl; | |
_ps.clear(); | |
return std::make_tuple(Word(0), _ps); | |
} | |
static std::tuple<Word, SWord, SWord> EGCD(Word const & a, Word const & b) { | |
Word ro = 0, r = 0, qu = 0, re = 0; | |
SWord so = 0, s = 0; | |
std::tie(ro, r, so, s) = std::make_tuple(a, b, 1, 0); | |
while (r != 0) { | |
std::tie(qu, re) = std::make_tuple(ro / r, ro % r); | |
std::tie(ro, r) = std::make_tuple(r, re); | |
std::tie(so, s) = std::make_tuple(s, so - s * SWord(qu)); | |
} | |
SWord const to = (SWord(ro) - SWord(a) * so) / SWord(b); | |
return std::make_tuple(ro, so, to); | |
} | |
Word inv(Word a, Word const & n, size_t any_n_q = 0) const { | |
#if SUPPORT_GMP | |
if constexpr(is_mpz) { | |
Word r; | |
if (mpz_invert(r.get_mpz_t(), a.get_mpz_t(), n.get_mpz_t()) == 0) { | |
mpz_gcd(r.get_mpz_t(), a.get_mpz_t(), n.get_mpz_t()); | |
throw InvError(r, a, n); | |
} | |
return r; | |
} | |
#endif | |
ASSERT(n > 0); | |
a = any_n_q == 0 ? mod(a, n) : any_n_q == 1 ? modNa(a) : any_n_q == 2 ? modQa(a) : 0; | |
auto [gcd, s, t] = EGCD(a, n); | |
if (gcd != 1) | |
throw InvError(gcd, a, n); | |
a = Word(SWord(n) + s); | |
a = any_n_q == 0 ? mod(a, n) : any_n_q == 1 ? modNa(a) : any_n_q == 2 ? modQa(a) : 0; | |
return a; | |
} | |
Word invN(Word a) const { return inv(a, N_, 1); } | |
Word invQ(Word a) const { return inv(a, q_, 2); } | |
ECPoint & operator += (ECPoint const & o) { | |
if (x_ == 0 && y_ == 0) { | |
*this = o; | |
return *this; | |
} | |
if (o.x_ == 0 && o.y_ == 0) | |
return *this; | |
Word const Px = x_, Py = y_, Qx = o.x_, Qy = o.y_; | |
Word s = 0; | |
if ((Adjust(Px, N_) == Adjust(Qx, o.N_)) && (Adjust(Py, N_) == Adjust(Qy, o.N_))) | |
s = modNn(DWord(modNn(DWord(Px) * Px * 3) + A_) * invN(Py * 2)); | |
else | |
s = modNn(DWord(Py + 2 * N_ - Qy) * invN(Px + 2 * N_ - Qx)); | |
x_ = modNn(DWord(s) * s + 4 * N_ - Px - Qx); | |
y_ = modNn(DWord(s) * (Px + 2 * N_ - x_) + 2 * N_ - Py); | |
return *this; | |
} | |
ECPoint operator + (ECPoint const & o) const { | |
ECPoint c = *this; | |
c += o; | |
return c; | |
} | |
template <typename TK> | |
ECPoint & operator *= (TK k) { | |
auto const ok = k; | |
ASSERT(k > 0); | |
if (k == 1) | |
return *this; | |
k -= 1; | |
auto r = *this, s = *this; | |
for (size_t icycle = 0;; ++icycle) { | |
if (ToU32<TK>(k & 1)) { | |
r += s; | |
if (k == 1) | |
break; | |
} | |
k >>= 1; | |
s += s; | |
} | |
if constexpr(0) { | |
auto r2 = *this; | |
for (u64 i = 1; i < ok; ++i) | |
r2 += *this; | |
ASSERT(r == r2); | |
} | |
*this = r; | |
return *this; | |
} | |
ECPoint operator * (Word k) const { | |
ECPoint r = *this; | |
r *= k; | |
return r; | |
} | |
bool operator == (ECPoint const & o) const { | |
return A_ == o.A_ && B_ == o.B_ && N_ == o.N_ && q_ == o.q_ && | |
Adjust(x_, N_) == Adjust(o.x_, o.N_) && Adjust(y_, N_) == Adjust(o.y_, o.N_); | |
} | |
Word const & q() const { return q_; } | |
std::vector<Word> const & q_ps() const { return q_ps_; } | |
Word const & x() const { return x_; } | |
std::string ToStr() const { | |
return "X " + IntToStr(x_) + " Y " + IntToStr(y_) + " A " + IntToStr(A_) + " B " + IntToStr(B_) + " N " + IntToStr(N_) + " q " + IntToStr(q_); | |
} | |
private: | |
Word A_ = 0, B_ = 0, N_ = 0, q_ = 0, x_ = 0, y_ = 0, N_br_ = 0, Q_br_ = 0; | |
size_t N_bs_ = 0, Q_bs_ = 0; | |
std::vector<Word> q_ps_; | |
}; | |
template <typename T = u32> | |
T GetPrime(size_t pos) { | |
static std::vector<T> primes = {2, 3}; | |
static AtomicMutex mux; | |
if (!mux.Flag().test() && pos < primes.size()) | |
return primes[pos]; | |
auto lock = mux.Locker(); | |
while (pos >= primes.size()) | |
for (T p = primes.back() + 2;; p += 2) { | |
bool is_prime = true; | |
for (auto d: primes) { | |
if (d * d > p) | |
break; | |
if (p % d == 0) { | |
is_prime = false; | |
break; | |
} | |
} | |
if (is_prime) { | |
primes.push_back(p); | |
break; | |
} | |
} | |
return primes[pos]; | |
} | |
std::pair<double, double> GetPrimeLog2(size_t pos) { | |
static std::vector<std::pair<double, double>> plogs; | |
static AtomicMutex mux; | |
if (!mux.Flag().test() && pos < plogs.size()) | |
return plogs[pos]; | |
auto lock = mux.Locker(); | |
while (pos >= plogs.size()) { | |
auto const log2 = std::log2(GetPrime(pos)); | |
plogs.push_back({log2, plogs.empty() ? log2 : plogs.back().second + log2}); | |
} | |
return plogs[pos]; | |
} | |
template <typename T> | |
std::pair<bool, bool> IsProbablyPrime_TrialDiv(T const n, u64 limit = u64(-1)) { | |
// https://en.wikipedia.org/wiki/Trial_division | |
if (n <= 10) | |
return {n == 2 || n == 3 || n == 5 || n == 7, true}; | |
if ((n & 1) == 0) | |
return {false, true}; | |
u64 d = 0; | |
for (d = 3; d < limit && d * d <= n; d += 2) | |
if (n % d == 0) | |
return {false, true}; | |
return {true, d * d > n}; | |
} | |
template <typename T> | |
bool IsProbablyPrime_Fermat(T const n, size_t ntrials = 32) { | |
// https://en.wikipedia.org/wiki/Fermat_primality_test | |
for (size_t trial = 0; trial < ntrials; ++trial) | |
if (PowMod<T>(RandRange<T>(n - 3) + 2, n - 1, n) != 1) | |
return false; | |
return true; | |
} | |
template <typename T> | |
bool IsProbablyPrime(T const n) { | |
if (n < (1 << 12)) | |
return IsProbablyPrime_TrialDiv(n).first; | |
return IsProbablyPrime_Fermat(n); | |
} | |
template <typename T> | |
std::pair<std::vector<T>, std::vector<T>> Factor_TrialDiv_Odd(T n, u64 limit = u64(-1)) { | |
// https://en.wikipedia.org/wiki/Trial_division | |
thread_local std::vector<T> fs0; | |
auto & fs = fs0; | |
fs.clear(); | |
while ((n & 1) == 0) { | |
n >>= 1; | |
fs.push_back(2); | |
} | |
T d = 0; | |
for (d = 3; d < limit && d * d <= n; d += 2) | |
while (n % d == 0) { | |
n /= d; | |
fs.push_back(d); | |
} | |
if (n == 1) | |
return {fs, {}}; | |
if (d * d > n) { | |
fs.push_back(n); | |
return {fs, {}}; | |
} else | |
return {fs, {n}}; | |
} | |
template <typename To, typename From> | |
To ToT(From x) { | |
To y = 0; | |
for (size_t i = 0; x != 0; ++i) { | |
u32 w = ToU32<From>(x); | |
x >>= 32; | |
if ((i + 1) * 32 <= BiSize(To)) | |
y |= To(w) << (i * 32); | |
} | |
return y; | |
} | |
template <typename T> | |
std::pair<std::vector<T>, std::vector<T>> Factor_TrialDiv_Primes(T n, u64 limit = u64(-1)) { | |
// https://en.wikipedia.org/wiki/Trial_division | |
thread_local std::vector<T> fs0; | |
auto & fs = fs0; | |
fs.clear(); | |
while ((n & 1) == 0) { | |
n >>= 1; | |
fs.push_back(2); | |
} | |
u64 const n_limit = (n >> 64) == 0 ? ToT<u64, T>(n) : (u64(-1) >> 1); | |
u32 p = 0; | |
for (size_t i = 1;; ++i) { | |
p = GetPrime(i); | |
if (p >= limit || u64(p) * p > n_limit) | |
break; | |
while (n % p == 0) { | |
n /= p; | |
fs.push_back(p); | |
} | |
} | |
if (n == 1) | |
return {fs, {}}; | |
if (u64(p) * p > n_limit) { | |
fs.push_back(n); | |
return {fs, {}}; | |
} else | |
return {fs, {n}}; | |
} | |
template <typename T> | |
T GCD(T a, T b) { | |
while (b != 0) { | |
std::swap(a, b); | |
b %= a; | |
} | |
return a; | |
} | |
std::string FloatToStr(double x, int prec = 7) { | |
std::ostringstream ss; | |
ss << std::fixed << std::setprecision(prec) << x; | |
return ss.str(); | |
} | |
template <typename T> | |
std::pair<std::vector<T>, std::vector<T>> Factor_PollardRho(T n, u64 limit = u64(-1), size_t ntrials = 6) { | |
// https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm | |
using DT = typename DWordOf<T>::type; | |
if (n <= 1) | |
return {{}, {}}; | |
if (IsProbablyPrime<T>(n)) | |
return {{n}, {}}; | |
auto f = [&n](T x){ return T((DT(x) * x + 1) % n); }; | |
ASSERT(3 <= n); | |
size_t cnt = 0; | |
UniformIntDistr<T> distr(1, n - 2); | |
for (size_t itry = 0; itry < ntrials; ++itry) { | |
bool failed = false; | |
T x = distr(RNG()); | |
u64 sum_cycles = 0; | |
for (u64 cycle = 1;; cycle <<= 1) { | |
T y = x, m = 1, xstart = x; | |
for (u64 i = 0, istart = 0; i < cycle; ++i) { | |
x = f(x); | |
m = T((DT(m) * (n + x - y)) % n); | |
++cnt; | |
if (cnt >= limit) | |
return {{}, {n}}; | |
if (!(i + 1 - istart >= (1 << 7) || i + 1 >= cycle)) | |
continue; | |
if (GCD<T>(n, m) == 1) { | |
istart = i + 1; | |
xstart = x; | |
continue; | |
} | |
T x2 = xstart; | |
for (u64 i2 = istart; i2 <= i; ++i2) { | |
x2 = f(x2); | |
auto const g = GCD<T>(n, n + x2 - y); | |
if (g == 1) | |
continue; | |
sum_cycles += i + 1; | |
{ | |
auto const threshold = sum_cycles >= 100 ? 0.5 : 0.6; | |
ASSERT_MSG(std::log2(sum_cycles) < std::log2(ToDouble<T>(n)) * threshold, "n " + IntToStr(n) + | |
" (log2 " + std::to_string(std::log2(ToDouble<T>(n))) + | |
") cycles " + IntToStr(sum_cycles) + " (log2 " + std::to_string(std::log2(sum_cycles)) + ")"); | |
} | |
if (g == n) { | |
failed = true; | |
break; | |
} | |
auto res0 = Factor_PollardRho<T>(g, limit, ntrials); | |
auto res1 = Factor_PollardRho<T>(n / g, limit, ntrials); | |
res0.first.insert(res0.first.end(), res1.first.begin(), res1.first.end()); | |
res0.second.insert(res0.second.end(), res1.second.begin(), res1.second.end()); | |
return std::move(res0); | |
} | |
if (failed) | |
break; | |
ASSERT(false); | |
} | |
sum_cycles += cycle; | |
if (failed) | |
break; | |
} | |
} | |
return {{}, {n}}; | |
} | |
template <typename T> | |
std::pair<std::vector<T>, std::vector<T>> Factor_ECM(T const n, u64 limit = u64(-1), bool verbose = false) { | |
if (n <= 1) | |
return {{}, {}}; | |
if (IsProbablyPrime(n)) | |
return {{n}, {}}; | |
using ECT = ECPoint<T>; | |
struct Entry { | |
ECT curve; | |
size_t done_primes = 0; | |
bool active = true; | |
}; | |
size_t const bits = NumBits(n), nthr = std::thread::hardware_concurrency(); | |
std::vector<Entry> curves; | |
size_t T_bisize = 0; | |
#if SUPPORT_GMP | |
if constexpr(std::is_same_v<T, mpz_class>) | |
T_bisize = (bits + 63) / 64 * 64; | |
else | |
#endif | |
T_bisize = BiSize(T); | |
T n_factored = n; | |
bool n_factored_changed = true; | |
std::set<T> factors; | |
std::mutex mux; | |
std::atomic<double> done_ops = 0; | |
std::vector<std::future<void>> asyncs; | |
size_t num_primes = 0; | |
double tim = Time(); | |
for (size_t iiter = 0;; ++iiter) { | |
if (n_factored_changed && IsProbablyPrime(n_factored)) { | |
factors.insert(n_factored); | |
n_factored = 1; | |
} | |
if (n_factored == 1) | |
break; | |
tim = Time(); | |
size_t const extra_primes = iiter == 0 ? 1 << 9 : [&]{ | |
auto Prob = [&](size_t nprimes){ | |
double x = double(bits) / GetPrimeLog2(nprimes).first; | |
return std::pow(x, -x); | |
}; | |
auto Prob2 = [&](size_t ncurves, size_t nprimes){ | |
return 1.0 - std::pow(1.0 - Prob(nprimes), ncurves); | |
}; | |
auto Prob2G = [&](size_t ncurves, size_t nprimes){ | |
#if SUPPORT_GMP | |
double x = double(bits) / GetPrimeLog2(nprimes).first; | |
mpf_class f; | |
f.set_prec(1 << 15); | |
f = x; | |
for (size_t i = 0; i < 4; ++i) | |
mpf_sqrt(f.get_mpf_t(), f.get_mpf_t()); | |
mpf_pow_ui(f.get_mpf_t(), f.get_mpf_t(), u32(std::llround(x * 16.0))); | |
mpf_ui_div(f.get_mpf_t(), 1, f.get_mpf_t()); | |
mpf_ui_sub(f.get_mpf_t(), 1, f.get_mpf_t()); | |
mpf_pow_ui(f.get_mpf_t(), f.get_mpf_t(), u32(ncurves)); | |
mpf_ui_sub(f.get_mpf_t(), 1, f.get_mpf_t()); | |
return f; | |
#else | |
return Prob2(ncurves, nprimes); | |
#endif | |
}; | |
auto Work = [&](size_t ncurves, size_t nprimes){ | |
return GetPrimeLog2(nprimes).second * ncurves; | |
}; | |
size_t maxi = 0, i = 0; | |
double w0 = Work(curves.size(), num_primes); | |
auto p0 = Prob2G(curves.size(), num_primes); | |
using P2GT = std::decay_t<decltype(p0)>; | |
P2GT maxv = 0; | |
for (i = 8; i <= (1 << 9); i += 8) { | |
size_t constexpr dc = 1; | |
double w1 = Work(curves.size() + dc, num_primes + i), wd = w1 - w0; | |
P2GT p1 = Prob2G(curves.size() + dc, num_primes + i), pd = p1 - p0; | |
/* | |
ASSERT_MSG(w0 < w1 && p0 <= p1, | |
"w0 " + FloatToStr(w0, 1) + " w1 " + FloatToStr(w1, 1) + | |
" p0 " + FloatToStr(p0, 6) + " p1 " + FloatToStr(p1, 6) + " (" + | |
std::to_string(p1) + ") i " + std::to_string(i)); | |
*/ | |
P2GT k = pd / wd; | |
ASSERT(k >= 0); | |
if (k >= maxv) { | |
maxv = k; | |
maxi = i; | |
} | |
if (maxi != 0 && maxi + 20 < i/* && maxv / k > 1.05*/) | |
break; | |
} | |
//ASSERT(i <= (1 << 12)); | |
return maxi == 0 ? 1 << 5 : maxi; | |
}(); | |
size_t const B_bits = std::min<size_t>(T_bisize - 1, 32), tot_bisize = T_bisize * 4, | |
task_bits = std::min<size_t>(tot_bisize - 8, 1 << 9); | |
ASSERT(B_bits < T_bisize); | |
for (size_t i = 0; i < (iiter == 0 ? 1 : 1); ++i) | |
curves.push_back(Entry{.curve = ECT::base_gen(size_t(-1), n)}); | |
num_primes += extra_primes; | |
for (size_t ithr = 0; ithr < nthr; ++ithr) | |
asyncs.push_back(std::async(std::launch::async, [&, ithr]{ | |
size_t const block = (curves.size() + nthr - 1) / nthr, begin = ithr * block, | |
end = std::min<size_t>((ithr + 1) * block, curves.size()); | |
for (size_t icurve = begin; icurve < end; ++icurve) { | |
auto & curve = curves.at(icurve); | |
if (!curve.active) | |
continue; | |
while (curve.done_primes < num_primes) { | |
typename QWordOf<T>::type tot = 1; | |
double tot_lsum = 0; | |
size_t i = 0; | |
for (i = curve.done_primes; i < num_primes; ++i) { | |
auto const p = GetPrime<u64>(i); | |
auto const plog = GetPrimeLog2(i).first; | |
T mp = p; | |
double lsum = plog; | |
while (true) { | |
if (lsum + plog > B_bits) | |
break; | |
mp *= p; | |
lsum += plog; | |
} | |
tot_lsum += lsum; | |
tot *= mp; | |
if (tot_lsum + B_bits > task_bits) { | |
++i; | |
break; | |
} | |
} | |
curve.done_primes = i; | |
try { | |
done_ops += tot_lsum; | |
curve.curve *= tot; | |
} catch (typename ECT::InvError const & ex) { | |
ASSERT(ex.gcd > 1 && n % ex.gcd == 0); | |
if (ex.gcd == n) | |
continue; | |
else { | |
std::lock_guard<std::mutex> lock(mux); | |
factors.insert(ex.gcd); | |
while (n_factored % ex.gcd == 0) | |
n_factored /= ex.gcd; | |
n_factored_changed = true; | |
} | |
curve.active = false; | |
break; | |
} | |
} | |
} | |
})); | |
for (auto & e: asyncs) | |
e.get(); | |
asyncs.clear(); | |
if (verbose) | |
std::cout << "Curves " << std::setw(3) << curves.size() /*<< ", Curve " << curves.back().curve.ToStr()*/ | |
<< ", Ops " << std::fixed << std::setprecision(3) << std::setw(8) << double(done_ops) / 1024 | |
<< " Ki, ExtraPrimes " << std::setw(3) << extra_primes << ", Primes " << std::setw(6) << double(num_primes) / 1024 | |
<< " Ki, IterTime " << std::setw(7) << (Time() - tim) << " sec" << std::endl; | |
if (done_ops >= limit) | |
break; | |
} | |
std::pair<std::vector<T>, std::vector<T>> res; | |
T nc = n; | |
for (auto f: factors) { | |
ASSERT(nc % f == 0); | |
while (nc % f == 0) { | |
res.first.push_back(f); | |
nc /= f; | |
} | |
} | |
if (nc != 1) | |
res.second.push_back(nc); | |
return res; | |
} | |
template <typename T> | |
void OutputFactors(T n, std::pair<std::vector<T>, std::vector<T>> const & fs, bool check = true) { | |
auto Sorted = [](auto v){ | |
std::sort(v.begin(), v.end()); | |
return v; | |
}; | |
auto Out = [&](auto const & x){ | |
for (auto f: Sorted(x)) | |
std::cout << IntToStr(f) << " (2^" << std::fixed << std::setprecision(2) | |
<< std::log2(ToDouble<T>(f)) << (IsProbablyPrime<T>(f) ? ", prime)" : ", composite)") << ", "; | |
}; | |
if (n != 0) | |
std::cout << "Num: " << IntToStr(n) << " (2^" << std::fixed << std::setprecision(2) << std::log2(ToDouble<T>(n)) << ")"; | |
std::cout << (fs.first.empty() ? "" : "\nFactored: "); Out(fs.first); | |
std::cout << (fs.second.empty() ? "" : "\nUnFactored: "); Out(fs.second); std::cout << std::endl; | |
if (check) { | |
for (auto const & e0: {fs.first, fs.second}) | |
for (auto const & e1: e0) { | |
ASSERT(n % e1 == 0); | |
n /= e1; | |
} | |
ASSERT(n == 1); | |
} | |
} | |
template <typename T> | |
std::pair<std::vector<T>, std::vector<T>> Factor(T const n, size_t sec512 = 3, bool verbose = false) { | |
using ResT = std::pair<std::vector<T>, std::vector<T>>; | |
double tt = Time(), t0 = 0, t1 = 0, t2 = 0; | |
ResT r0{}, r1{}, r2{}; | |
t0 = Time(); | |
r0 = Factor_TrialDiv_Primes<T>(n, std::llround((1 << 22) * sec512 * 0.1)); | |
t0 = Time() - t0; | |
if (verbose) { | |
std::cout << "TrialDiv time " << std::fixed << std::setprecision(3) << t0 << " sec" << std::endl; | |
OutputFactors<T>(n, r0, false); | |
} | |
if (!r0.second.empty()) { | |
t1 = Time(); | |
r1 = Factor_PollardRho<T>(r0.second.front(), std::llround((1 << 17) * sec512 * 0.2)); | |
t1 = Time() - t1; | |
if (verbose) { | |
std::cout << "PollardRho time " << t1 << " sec" << std::endl; | |
OutputFactors<T>(r0.second.front(), r1, false); | |
} | |
r0.first.insert(r0.first.end(), r1.first.begin(), r1.first.end()); | |
} | |
r0.second.clear(); | |
for (auto e: r1.second) { | |
t2 = Time(); | |
r2 = Factor_ECM<T>(e, std::llround((1 << 12) * sec512 * 0.7), true); | |
t2 = Time() - t2; | |
if (verbose) { | |
std::cout << "ECM time " << t2 << " sec" << std::endl; | |
OutputFactors<T>(e, r2, false); | |
} | |
r0.first.insert(r0.first.end(), r2.first.begin(), r2.first.end()); | |
r0.second.insert(r0.second.end(), r2.second.begin(), r2.second.end()); | |
} | |
tt = Time() - tt; | |
if (verbose) { | |
std::cout << "Final time " << std::fixed << std::setprecision(3) << tt << " sec" << std::endl; | |
OutputFactors<T>(n, r0, true); | |
} | |
return r0; | |
} | |
template <typename T> | |
static T RandPrimeRange(T begin, T end) { | |
ASSERT(begin < end); | |
while (true) { | |
T const p = (RandRange<T>(end - begin) + begin) | 1; | |
if (IsProbablyPrime<T>(p)) | |
return p; | |
} | |
} | |
template <typename T> | |
static T RandPrime(size_t bits) { | |
using DT = typename DWordOf<T>::type; | |
return RandPrimeRange<T>(T(1) << (bits - 1), T((DT(1) << bits) - 1)); | |
} | |
int main() { | |
try { | |
using T = mpz_class; | |
for (size_t i = 0; i < 8; ++i) | |
Factor<T>(RandBits<T>(2048), 60, true); | |
return 0; | |
T n = 1; | |
for (size_t bits: {4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, 56, 64, 72}) | |
n *= RandPrime<T>(bits); | |
Factor<T>(n, 20, true); | |
return 0; | |
} catch (std::exception const & ex) { | |
std::cout << "Exception: " << ex.what() << std::endl; | |
return -1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment