Skip to content

Instantly share code, notes, and snippets.

@polkovnikov
Created October 1, 2022 20:05
Show Gist options
  • Save polkovnikov/f8e977eed9ccc121a027dbd650e26208 to your computer and use it in GitHub Desktop.
Save polkovnikov/f8e977eed9ccc121a027dbd650e26208 to your computer and use it in GitHub Desktop.
ECM (Elliptic Curve) factorization method, together with Trial Division and Pollard's Rho
#include <cstdint>
#include <random>
#include <stdexcept>
#include <type_traits>
#include <iomanip>
#include <iostream>
#include <string>
#include <chrono>
#include <cmath>
#include <sstream>
#include <chrono>
#include <atomic>
#include <set>
#include <algorithm>
#include <vector>
#include <mutex>
#include <future>
#include <thread>
#include <boost/multiprecision/cpp_int.hpp>
#define SUPPORT_GMP 1
#if SUPPORT_GMP
#include <gmpxx.h>
#endif
#define ASSERT_MSG(cond, msg) { if (!(cond)) throw std::runtime_error("Assertion (" #cond ") failed at line " + std::to_string(__LINE__) + "! Msg '" + std::string(msg) + "'."); }
#define ASSERT(cond) ASSERT_MSG(cond, "")
#define COUT(code) { std::unique_lock<std::mutex> lock(cout_mux); std::cout code; std::cout << std::flush; }
#define LN { COUT(<< "LN " << __LINE__ << std::endl); }
#define DUMP(x) { COUT(<< " " << (#x) << " = " << (x) << std::endl); }
#define bisizeof(x) (sizeof(x) * 8)
#define BiSize(T) (BiSizeS<T>::value)
using u8 = uint8_t;
using u32 = uint32_t;
using i32 = int32_t;
using u64 = uint64_t;
using i64 = int64_t;
using u128 = boost::multiprecision::uint128_t;
using i128 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<128, 128, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<192, 192, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u256 = boost::multiprecision::uint256_t;
using i256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<256, 256, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<384, 384, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u512 = boost::multiprecision::uint512_t;
using i512 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<512, 512, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u768 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<768, 768, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u1024 = boost::multiprecision::uint1024_t;
using i1024 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<1024, 1024, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u1536 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<1536, 1536, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u2048 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<2048, 2048, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using i2048 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<2048, 2048, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u3072 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<3072, 3072, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u4096 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<4096, 4096, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using i4096 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<4096, 4096, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u6144 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<6144, 6144, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u8192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<8192, 8192, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using i8192 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<8192, 8192, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u12288 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<12288, 12288, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using u16384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<16384, 16384, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
using i16384 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<16384, 16384, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void>>;
using u24576 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<24576, 24576, boost::multiprecision::unsigned_magnitude, boost::multiprecision::unchecked, void>>;
template <typename T> struct SWordOf;
template <> struct SWordOf<u32> { using type = i32; };
template <> struct SWordOf<u64> { using type = i64; };
template <> struct SWordOf<u128> { using type = i128; };
template <> struct SWordOf<u256> { using type = i256; };
template <> struct SWordOf<u512> { using type = i512; };
template <> struct SWordOf<u1024> { using type = i1024; };
template <> struct SWordOf<u2048> { using type = i2048; };
template <> struct SWordOf<u4096> { using type = i4096; };
template <> struct SWordOf<u8192> { using type = i8192; };
template <> struct SWordOf<u16384> { using type = i16384; };
#if SUPPORT_GMP
template <> struct SWordOf<mpz_class> { using type = mpz_class; };
#endif
template <typename T> struct DWordOf;
template <> struct DWordOf<u32> { using type = u64; };
template <> struct DWordOf<u64> { using type = u128; };
template <> struct DWordOf<u128> { using type = u256; };
template <> struct DWordOf<u256> { using type = u512; };
template <> struct DWordOf<u512> { using type = u1024; };
template <> struct DWordOf<u1024> { using type = u2048; };
template <> struct DWordOf<u2048> { using type = u4096; };
template <> struct DWordOf<u4096> { using type = u8192; };
template <> struct DWordOf<u8192> { using type = u16384; };
#if SUPPORT_GMP
template <> struct DWordOf<mpz_class> { using type = mpz_class; };
#endif
template <typename T> struct TWordOf;
template <> struct TWordOf<u64> { using type = u192; };
template <> struct TWordOf<u128> { using type = u384; };
template <> struct TWordOf<u256> { using type = u768; };
template <> struct TWordOf<u512> { using type = u1536; };
template <> struct TWordOf<u1024> { using type = u3072; };
template <> struct TWordOf<u2048> { using type = u6144; };
template <> struct TWordOf<u4096> { using type = u12288; };
template <> struct TWordOf<u8192> { using type = u24576; };
#if SUPPORT_GMP
template <> struct TWordOf<mpz_class> { using type = mpz_class; };
#endif
template <typename T> struct QWordOf;
template <> struct QWordOf<u32> { using type = u128; };
template <> struct QWordOf<u64> { using type = u256; };
template <> struct QWordOf<u128> { using type = u512; };
template <> struct QWordOf<u256> { using type = u1024; };
template <> struct QWordOf<u512> { using type = u2048; };
template <> struct QWordOf<u1024> { using type = u4096; };
template <> struct QWordOf<u2048> { using type = u8192; };
template <> struct QWordOf<u4096> { using type = u16384; };
#if SUPPORT_GMP
template <> struct QWordOf<mpz_class> { using type = mpz_class; };
#endif
template <typename T> struct BiSizeS; //: std::integral_constant<size_t, bisizeof(T)> {};
template <> struct BiSizeS<u32> : std::integral_constant<size_t, 32> {};
template <> struct BiSizeS<u64> : std::integral_constant<size_t, 64> {};
template <> struct BiSizeS<u128> : std::integral_constant<size_t, 128> {};
template <> struct BiSizeS<u256> : std::integral_constant<size_t, 256> {};
template <> struct BiSizeS<u512> : std::integral_constant<size_t, 512> {};
template <> struct BiSizeS<u1024> : std::integral_constant<size_t, 1024> {};
template <> struct BiSizeS<u2048> : std::integral_constant<size_t, 2048> {};
template <> struct BiSizeS<u4096> : std::integral_constant<size_t, 4096> {};
template <> struct BiSizeS<u8192> : std::integral_constant<size_t, 8192> {};
template <> struct BiSizeS<u16384> : std::integral_constant<size_t, 16384> {};
std::mutex cout_mux;
class AtomicMutex {
public:
auto Locker() { return std::lock_guard<AtomicMutex>(*this); }
auto & Flag() { return f_; }
void lock() { while (f_.test_and_set(std::memory_order_acquire)) {} }
void unlock() { f_.clear(std::memory_order_release); }
private:
std::atomic_flag f_ = ATOMIC_FLAG_INIT;
};
template <typename T>
u32 ToU32(T const & x) {
#if SUPPORT_GMP
if constexpr(std::is_same_v<std::decay_t<T>, mpz_class>)
return x.get_ui();
else
#endif
return u32(x);
}
template <typename T>
double ToDouble(T const & x) {
#if SUPPORT_GMP
if constexpr(std::is_same_v<std::decay_t<T>, mpz_class>)
return x.get_d();
else
#endif
return double(x);
}
template <typename T>
std::string IntToStr(T n) {
if (n == 0)
return "0";
std::string r;
while (n != 0) {
u32 constexpr mod = 1'000'000'000U;
std::ostringstream ss;
ss << ToU32<T>(n % mod);
r = ss.str() + r;
n /= mod;
}
return r;
}
static auto & RNG() {
thread_local std::mt19937_64 rng{123}; //{(u64(std::random_device{}()) << 32) ^ u64(std::random_device{}())};
return rng;
}
template <typename T>
static T RandBits(size_t bisize) {
T r = 0;
for (size_t i = 0; i < bisize; i += 32) {
size_t const cbits = std::min<size_t>(32, bisize - i);
r <<= cbits;
r ^= u32(RNG()()) >> (32 - cbits);
}
return r;
}
template <typename T>
static size_t NumBits(T n) {
size_t cnt = 0;
while (n > 0xFF) {
n >>= 8;
cnt += 8;
}
while (n != 0) {
n >>= 1;
++cnt;
}
return cnt;
}
template <typename T>
static T RandRange(T end) {
if (end <= 1)
return 0;
size_t const bits = NumBits<T>(end - 1);
while (true) {
auto const n = RandBits<T>(bits);
if (n < end)
return n;
}
}
template <typename T>
struct UniformIntDistr {
UniformIntDistr(T a, T b) : a_(a), diff_(b + 1 - a) { ASSERT(a <= b); }
template <typename R>
T operator()(R & r) const { return a_ + RandRange<T>(diff_); }
T a_ = 0, diff_ = 0;
};
template <typename T>
static T PowMod(T a, T b, T const & c) {
// https://en.wikipedia.org/wiki/Modular_exponentiation
using DT = typename DWordOf<T>::type;
T r = 1;
while (b != 0) {
if (ToU32<T>(b & 1))
r = T((DT(r) * a) % c);
a = T((DT(a) * a) % c);
b >>= 1;
}
return r;
}
static double Time() {
static auto const gtb = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::duration<double>>(
std::chrono::high_resolution_clock::now() - gtb).count();
}
template <typename Word>
class ECPoint {
public:
using SWord = typename SWordOf<Word>::type;
using DWord = typename DWordOf<Word>::type;
using TWord = typename TWordOf<Word>::type;
static bool constexpr use_barrett = false, is_mpz = std::is_same_v<Word, mpz_class>;
class InvError : public std::runtime_error {
public:
InvError(Word const & _gcd, Word const & _x, Word const & _mod)
: std::runtime_error("(gcd " + IntToStr(_gcd) + ", x " + IntToStr(_x) +
", mod " + IntToStr(_mod) + ")"), gcd(_gcd), x(_x), mod(_mod) {}
Word gcd = 0, x = 0, mod = 0;
};
template <typename T>
static T Rand() {
T r = 0;
for (size_t i = 0; i < BiSize(T); i += 64)
r ^= T(RNG()()) << i;
return r;
}
template <typename T>
static T RandUni(T a, T b) {
return UniformIntDistr<T>(a, b)(RNG());
}
static Word rand_range(Word const & begin, Word const & end) {
ASSERT(begin < end);
return UniformIntDistr<Word>(begin, end - 1)(RNG());
}
static bool fermat_prp(Word const & n, size_t trials = 32) {
// https://en.wikipedia.org/wiki/Fermat_primality_test
if (n <= 16)
return n == 2 || n == 3 || n == 5 || n == 7 || n == 11 || n == 13;
for (size_t i = 0; i < trials; ++i)
if (PowMod<Word>(rand_range(2, n - 2), n - 1, n) != 1)
return false;
return true;
}
static Word rand_prime_range(Word begin, Word end) {
while (true) {
Word const p = rand_range(begin, end) | 1;
if (fermat_prp(p))
return p;
}
}
static Word rand_prime(size_t bits) {
return rand_prime_range(Word(1) << (bits - 1), Word((DWord(1) << bits) - 1));
}
std::tuple<Word, size_t> BarrettRS(Word n) {
size_t constexpr extra = 3;
for (size_t k = 0; k < BiSize(DWord); ++k) {
if (2 * (k + extra) < BiSize(Word))
continue;
if ((DWord(1) << k) <= DWord(n))
continue;
k += extra;
ASSERT_MSG(2 * k < BiSize(DWord), "k " + std::to_string(k) + " BiSize(DWord) " + std::to_string(BiSize(DWord)));
DWord r = (DWord(1) << (2 * k)) / n;
ASSERT_MSG(DWord(r) < (DWord(1) << BiSize(Word)),
"k " + std::to_string(k) + " n " + IntToStr(n));
ASSERT(2 * k >= BiSize(Word));
return std::make_tuple(Word(r), size_t(2 * k - BiSize(Word)));
}
ASSERT(false);
}
template <bool Adjust>
static Word BarrettMod(DWord const & x, Word const & n, Word const & r, size_t s) {
if constexpr(!use_barrett)
return Word(x % n);
else {
DWord const q = DWord(((TWord(x) * r) >> BiSize(Word)) >> s);
Word t = Word(DWord(x) - q * n);
if constexpr(Adjust) {
Word const mask = ~Word(SWord(t - n) >> (BiSize(Word) - 1));
t -= mask & n;
}
return t;
}
}
static Word Adjust(Word const & a, Word const & n) {
return a >= n ? a - n : a;
}
Word modNn(DWord const & a) const { return BarrettMod<false>(a, N_, N_br_, N_bs_); }
Word modNa(DWord const & a) const { return BarrettMod<true>(a, N_, N_br_, N_bs_); }
Word modQn(DWord const & a) const { return BarrettMod<false>(a, q_, Q_br_, Q_bs_); }
Word modQa(DWord const & a) const { return BarrettMod<true>(a, q_, Q_br_, Q_bs_); }
static Word mod(DWord const & a, Word const & n) { return Word(a % n); }
static ECPoint base_gen(size_t bits = size_t(-1), Word N0 = 0, bool calc_q = false, Word min_order_pfactor = 0) {
ASSERT(bits != size_t(-1) || N0 != 0);
while (true) {
Word N = 0;
if (N0 != 0)
N = N0;
else
for (; mod(N, 4) != 3; N = rand_prime(bits));
Word const A = rand_range(1, N);
Word x0 = 0, y0 = 0, B = 0;
while (true) {
x0 = rand_range(1, N);
y0 = rand_range(1, N);
B = mod(mod(DWord(y0) * y0, N) + N * 2 - mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) - mod(DWord(A) * x0, N), N);
if (N0 == 0) {
Word const
right = mod(mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) + mod(DWord(A) * x0, N) + B, N),
y0_calc = PowMod<Word>(right, (N + 1) >> 2, N);
if (mod(DWord(y0_calc) * y0_calc, N) != right)
continue;
ASSERT_MSG(y0 == y0_calc || y0 == N - y0_calc, "x0 " + IntToStr(x0) + " y0 " + IntToStr(y0) + " y0_calc " +
IntToStr(y0_calc) + " A " + IntToStr(A) + " B " + IntToStr(B) + " N " + IntToStr(N));
}
break;
}
if (mod(mod(DWord(y0) * y0, N) + 3 * N - mod(DWord(mod(DWord(x0) * x0, N)) * x0, N) - mod(DWord(A) * x0, N) - B, N) != 0)
continue;
auto const bp = ECPoint(A, B, N, x0, y0, 0, true, calc_q);
if (calc_q) {
auto BpCheckOrder = [&]{
for (auto e: bp.q_ps())
if (e < min_order_pfactor)
return false;
return true;
};
if (!(bp.q() != 0 && !bp.q_ps().empty() && BpCheckOrder()))
continue;
ASSERT(bp.q() > 1 && bp * (bp.q() + 1) == bp);
}
return bp;
}
ASSERT(false);
}
ECPoint(Word A, Word B, Word N, Word x, Word y, Word q = 0, bool prepare = true, bool calc_q = false) {
if (prepare) {
A = mod(A, N); B = mod(B, N); x = mod(x, N); y = mod(y, N); q = mod(q, N);
ASSERT(mod(4 * mod(DWord(mod(DWord(A) * A, N)) * A, N) + 27 * mod(DWord(B) * B, N), N) != 0);
if constexpr(0) {
ASSERT(mod(N, 4) == 3);
if (!(x == 0 && y == 0)) {
ASSERT(mod(mod(DWord(y) * y, N) + 3 * N - mod(DWord(mod(DWord(x) * x, N)) * x, N) - mod(DWord(A) * x, N) - B, N) == 0);
auto root = PowMod<Word>(mod(DWord(mod(DWord(x) * x, N)) * x, N) + mod(DWord(A) * x, N) + B, (N + 1) >> 2, N);
ASSERT(y == root || y == N - root);
}
}
if constexpr(use_barrett) {
std::tie(N_br_, N_bs_) = BarrettRS(N);
if (q != 0)
std::tie(Q_br_, Q_bs_) = BarrettRS(q);
}
}
std::tie(A_, B_, N_, x_, y_, q_) = std::tie(A, B, N, x, y, q);
if (calc_q) {
std::tie(q_, q_ps_) = find_order();
if (q_ != 0) {
if constexpr(use_barrett)
std::tie(Q_br_, Q_bs_) = BarrettRS(q_);
}
}
}
auto copy() const {
return ECPoint(A_, B_, N_, x_, y_, q_, false);
}
auto inf() const {
return ECPoint(A_, B_, N_, 0, 0, q_, false);
}
static auto const & gen_primes(u64 const B) {
using T = u64;
thread_local std::vector<T> ps = {2, 3};
for (T p = ps.back() + 2; p <= B; p += 2) {
bool is_prime = true;
for (auto const e: ps) {
if (e * e > p)
break;
if (p % e == 0) {
is_prime = false;
break;
}
}
if (is_prime)
ps.push_back(p);
}
return ps;
}
static std::vector<Word> FactorTrialDiv(Word n) {
std::vector<Word> fs;
Word i = 0, lim = 1000;
while (n != 1) {
auto const & ps = gen_primes(lim);
for (size_t j = i;; ++j) {
if (j >= ps.size()) {
i = j;
lim = ps.back() + 1000;
break;
}
auto const p = ps[j];
while (n % p == 0) {
fs.push_back(p);
n /= p;
}
if (n == 1)
break;
}
}
return fs;
}
std::tuple<Word, std::vector<Word>> find_order(Word _m = 1, std::vector<Word> _ps = {}) const {
ASSERT(_m <= 2 * N_);
bool const first = _ps.empty();
double tim = 0;
if (first)
tim = Time();
u64 last_prime = 0;
if constexpr(1) {
auto r = *this;
try {
r *= _m;
} catch (InvError const &) {
return std::make_tuple(_m, _ps);
}
Word const B = 2 * N_;
u64 const limit = std::llround(std::pow(ToDouble<Word>(B), 1 / 2.5));
auto const primes = gen_primes(limit + 1);
last_prime = primes.back();
for (Word const p: primes) {
if (p > limit)
break;
ASSERT(p <= B);
size_t cnt = 0;
Word hi = 1;
try {
for (cnt = 1;; ++cnt) {
if (hi * p > B) {
cnt -= 1;
break;
}
hi *= p;
r *= p;
}
} catch (InvError const & ex) {
_ps.insert(_ps.begin(), cnt, p);
auto const res = find_order(hi * _m, _ps);
if (first)
std::cout << "FindOrderTime " << std::fixed << std::setprecision(3) << (Time() - tim) << ", last_prime " << last_prime << std::endl;
return res;
}
}
} else {
// Alternative slower way
auto r = *this;
for (Word i = 0;; ++i)
try {
r += *this;
} catch (InvError const &) {
_ps.clear();
return std::make_tuple(i + 2, _ps);
}
}
if (first)
std::cout << "FindOrderTime " << std::fixed << std::setprecision(3) << (Time() - tim) << ", last_prime " << last_prime << std::endl;
_ps.clear();
return std::make_tuple(Word(0), _ps);
}
static std::tuple<Word, SWord, SWord> EGCD(Word const & a, Word const & b) {
Word ro = 0, r = 0, qu = 0, re = 0;
SWord so = 0, s = 0;
std::tie(ro, r, so, s) = std::make_tuple(a, b, 1, 0);
while (r != 0) {
std::tie(qu, re) = std::make_tuple(ro / r, ro % r);
std::tie(ro, r) = std::make_tuple(r, re);
std::tie(so, s) = std::make_tuple(s, so - s * SWord(qu));
}
SWord const to = (SWord(ro) - SWord(a) * so) / SWord(b);
return std::make_tuple(ro, so, to);
}
Word inv(Word a, Word const & n, size_t any_n_q = 0) const {
#if SUPPORT_GMP
if constexpr(is_mpz) {
Word r;
if (mpz_invert(r.get_mpz_t(), a.get_mpz_t(), n.get_mpz_t()) == 0) {
mpz_gcd(r.get_mpz_t(), a.get_mpz_t(), n.get_mpz_t());
throw InvError(r, a, n);
}
return r;
}
#endif
ASSERT(n > 0);
a = any_n_q == 0 ? mod(a, n) : any_n_q == 1 ? modNa(a) : any_n_q == 2 ? modQa(a) : 0;
auto [gcd, s, t] = EGCD(a, n);
if (gcd != 1)
throw InvError(gcd, a, n);
a = Word(SWord(n) + s);
a = any_n_q == 0 ? mod(a, n) : any_n_q == 1 ? modNa(a) : any_n_q == 2 ? modQa(a) : 0;
return a;
}
Word invN(Word a) const { return inv(a, N_, 1); }
Word invQ(Word a) const { return inv(a, q_, 2); }
ECPoint & operator += (ECPoint const & o) {
if (x_ == 0 && y_ == 0) {
*this = o;
return *this;
}
if (o.x_ == 0 && o.y_ == 0)
return *this;
Word const Px = x_, Py = y_, Qx = o.x_, Qy = o.y_;
Word s = 0;
if ((Adjust(Px, N_) == Adjust(Qx, o.N_)) && (Adjust(Py, N_) == Adjust(Qy, o.N_)))
s = modNn(DWord(modNn(DWord(Px) * Px * 3) + A_) * invN(Py * 2));
else
s = modNn(DWord(Py + 2 * N_ - Qy) * invN(Px + 2 * N_ - Qx));
x_ = modNn(DWord(s) * s + 4 * N_ - Px - Qx);
y_ = modNn(DWord(s) * (Px + 2 * N_ - x_) + 2 * N_ - Py);
return *this;
}
ECPoint operator + (ECPoint const & o) const {
ECPoint c = *this;
c += o;
return c;
}
template <typename TK>
ECPoint & operator *= (TK k) {
auto const ok = k;
ASSERT(k > 0);
if (k == 1)
return *this;
k -= 1;
auto r = *this, s = *this;
for (size_t icycle = 0;; ++icycle) {
if (ToU32<TK>(k & 1)) {
r += s;
if (k == 1)
break;
}
k >>= 1;
s += s;
}
if constexpr(0) {
auto r2 = *this;
for (u64 i = 1; i < ok; ++i)
r2 += *this;
ASSERT(r == r2);
}
*this = r;
return *this;
}
ECPoint operator * (Word k) const {
ECPoint r = *this;
r *= k;
return r;
}
bool operator == (ECPoint const & o) const {
return A_ == o.A_ && B_ == o.B_ && N_ == o.N_ && q_ == o.q_ &&
Adjust(x_, N_) == Adjust(o.x_, o.N_) && Adjust(y_, N_) == Adjust(o.y_, o.N_);
}
Word const & q() const { return q_; }
std::vector<Word> const & q_ps() const { return q_ps_; }
Word const & x() const { return x_; }
std::string ToStr() const {
return "X " + IntToStr(x_) + " Y " + IntToStr(y_) + " A " + IntToStr(A_) + " B " + IntToStr(B_) + " N " + IntToStr(N_) + " q " + IntToStr(q_);
}
private:
Word A_ = 0, B_ = 0, N_ = 0, q_ = 0, x_ = 0, y_ = 0, N_br_ = 0, Q_br_ = 0;
size_t N_bs_ = 0, Q_bs_ = 0;
std::vector<Word> q_ps_;
};
template <typename T = u32>
T GetPrime(size_t pos) {
static std::vector<T> primes = {2, 3};
static AtomicMutex mux;
if (!mux.Flag().test() && pos < primes.size())
return primes[pos];
auto lock = mux.Locker();
while (pos >= primes.size())
for (T p = primes.back() + 2;; p += 2) {
bool is_prime = true;
for (auto d: primes) {
if (d * d > p)
break;
if (p % d == 0) {
is_prime = false;
break;
}
}
if (is_prime) {
primes.push_back(p);
break;
}
}
return primes[pos];
}
std::pair<double, double> GetPrimeLog2(size_t pos) {
static std::vector<std::pair<double, double>> plogs;
static AtomicMutex mux;
if (!mux.Flag().test() && pos < plogs.size())
return plogs[pos];
auto lock = mux.Locker();
while (pos >= plogs.size()) {
auto const log2 = std::log2(GetPrime(pos));
plogs.push_back({log2, plogs.empty() ? log2 : plogs.back().second + log2});
}
return plogs[pos];
}
template <typename T>
std::pair<bool, bool> IsProbablyPrime_TrialDiv(T const n, u64 limit = u64(-1)) {
// https://en.wikipedia.org/wiki/Trial_division
if (n <= 10)
return {n == 2 || n == 3 || n == 5 || n == 7, true};
if ((n & 1) == 0)
return {false, true};
u64 d = 0;
for (d = 3; d < limit && d * d <= n; d += 2)
if (n % d == 0)
return {false, true};
return {true, d * d > n};
}
template <typename T>
bool IsProbablyPrime_Fermat(T const n, size_t ntrials = 32) {
// https://en.wikipedia.org/wiki/Fermat_primality_test
for (size_t trial = 0; trial < ntrials; ++trial)
if (PowMod<T>(RandRange<T>(n - 3) + 2, n - 1, n) != 1)
return false;
return true;
}
template <typename T>
bool IsProbablyPrime(T const n) {
if (n < (1 << 12))
return IsProbablyPrime_TrialDiv(n).first;
return IsProbablyPrime_Fermat(n);
}
template <typename T>
std::pair<std::vector<T>, std::vector<T>> Factor_TrialDiv_Odd(T n, u64 limit = u64(-1)) {
// https://en.wikipedia.org/wiki/Trial_division
thread_local std::vector<T> fs0;
auto & fs = fs0;
fs.clear();
while ((n & 1) == 0) {
n >>= 1;
fs.push_back(2);
}
T d = 0;
for (d = 3; d < limit && d * d <= n; d += 2)
while (n % d == 0) {
n /= d;
fs.push_back(d);
}
if (n == 1)
return {fs, {}};
if (d * d > n) {
fs.push_back(n);
return {fs, {}};
} else
return {fs, {n}};
}
template <typename To, typename From>
To ToT(From x) {
To y = 0;
for (size_t i = 0; x != 0; ++i) {
u32 w = ToU32<From>(x);
x >>= 32;
if ((i + 1) * 32 <= BiSize(To))
y |= To(w) << (i * 32);
}
return y;
}
template <typename T>
std::pair<std::vector<T>, std::vector<T>> Factor_TrialDiv_Primes(T n, u64 limit = u64(-1)) {
// https://en.wikipedia.org/wiki/Trial_division
thread_local std::vector<T> fs0;
auto & fs = fs0;
fs.clear();
while ((n & 1) == 0) {
n >>= 1;
fs.push_back(2);
}
u64 const n_limit = (n >> 64) == 0 ? ToT<u64, T>(n) : (u64(-1) >> 1);
u32 p = 0;
for (size_t i = 1;; ++i) {
p = GetPrime(i);
if (p >= limit || u64(p) * p > n_limit)
break;
while (n % p == 0) {
n /= p;
fs.push_back(p);
}
}
if (n == 1)
return {fs, {}};
if (u64(p) * p > n_limit) {
fs.push_back(n);
return {fs, {}};
} else
return {fs, {n}};
}
template <typename T>
T GCD(T a, T b) {
while (b != 0) {
std::swap(a, b);
b %= a;
}
return a;
}
std::string FloatToStr(double x, int prec = 7) {
std::ostringstream ss;
ss << std::fixed << std::setprecision(prec) << x;
return ss.str();
}
template <typename T>
std::pair<std::vector<T>, std::vector<T>> Factor_PollardRho(T n, u64 limit = u64(-1), size_t ntrials = 6) {
// https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm
using DT = typename DWordOf<T>::type;
if (n <= 1)
return {{}, {}};
if (IsProbablyPrime<T>(n))
return {{n}, {}};
auto f = [&n](T x){ return T((DT(x) * x + 1) % n); };
ASSERT(3 <= n);
size_t cnt = 0;
UniformIntDistr<T> distr(1, n - 2);
for (size_t itry = 0; itry < ntrials; ++itry) {
bool failed = false;
T x = distr(RNG());
u64 sum_cycles = 0;
for (u64 cycle = 1;; cycle <<= 1) {
T y = x, m = 1, xstart = x;
for (u64 i = 0, istart = 0; i < cycle; ++i) {
x = f(x);
m = T((DT(m) * (n + x - y)) % n);
++cnt;
if (cnt >= limit)
return {{}, {n}};
if (!(i + 1 - istart >= (1 << 7) || i + 1 >= cycle))
continue;
if (GCD<T>(n, m) == 1) {
istart = i + 1;
xstart = x;
continue;
}
T x2 = xstart;
for (u64 i2 = istart; i2 <= i; ++i2) {
x2 = f(x2);
auto const g = GCD<T>(n, n + x2 - y);
if (g == 1)
continue;
sum_cycles += i + 1;
{
auto const threshold = sum_cycles >= 100 ? 0.5 : 0.6;
ASSERT_MSG(std::log2(sum_cycles) < std::log2(ToDouble<T>(n)) * threshold, "n " + IntToStr(n) +
" (log2 " + std::to_string(std::log2(ToDouble<T>(n))) +
") cycles " + IntToStr(sum_cycles) + " (log2 " + std::to_string(std::log2(sum_cycles)) + ")");
}
if (g == n) {
failed = true;
break;
}
auto res0 = Factor_PollardRho<T>(g, limit, ntrials);
auto res1 = Factor_PollardRho<T>(n / g, limit, ntrials);
res0.first.insert(res0.first.end(), res1.first.begin(), res1.first.end());
res0.second.insert(res0.second.end(), res1.second.begin(), res1.second.end());
return std::move(res0);
}
if (failed)
break;
ASSERT(false);
}
sum_cycles += cycle;
if (failed)
break;
}
}
return {{}, {n}};
}
template <typename T>
std::pair<std::vector<T>, std::vector<T>> Factor_ECM(T const n, u64 limit = u64(-1), bool verbose = false) {
if (n <= 1)
return {{}, {}};
if (IsProbablyPrime(n))
return {{n}, {}};
using ECT = ECPoint<T>;
struct Entry {
ECT curve;
size_t done_primes = 0;
bool active = true;
};
size_t const bits = NumBits(n), nthr = std::thread::hardware_concurrency();
std::vector<Entry> curves;
size_t T_bisize = 0;
#if SUPPORT_GMP
if constexpr(std::is_same_v<T, mpz_class>)
T_bisize = (bits + 63) / 64 * 64;
else
#endif
T_bisize = BiSize(T);
T n_factored = n;
bool n_factored_changed = true;
std::set<T> factors;
std::mutex mux;
std::atomic<double> done_ops = 0;
std::vector<std::future<void>> asyncs;
size_t num_primes = 0;
double tim = Time();
for (size_t iiter = 0;; ++iiter) {
if (n_factored_changed && IsProbablyPrime(n_factored)) {
factors.insert(n_factored);
n_factored = 1;
}
if (n_factored == 1)
break;
tim = Time();
size_t const extra_primes = iiter == 0 ? 1 << 9 : [&]{
auto Prob = [&](size_t nprimes){
double x = double(bits) / GetPrimeLog2(nprimes).first;
return std::pow(x, -x);
};
auto Prob2 = [&](size_t ncurves, size_t nprimes){
return 1.0 - std::pow(1.0 - Prob(nprimes), ncurves);
};
auto Prob2G = [&](size_t ncurves, size_t nprimes){
#if SUPPORT_GMP
double x = double(bits) / GetPrimeLog2(nprimes).first;
mpf_class f;
f.set_prec(1 << 15);
f = x;
for (size_t i = 0; i < 4; ++i)
mpf_sqrt(f.get_mpf_t(), f.get_mpf_t());
mpf_pow_ui(f.get_mpf_t(), f.get_mpf_t(), u32(std::llround(x * 16.0)));
mpf_ui_div(f.get_mpf_t(), 1, f.get_mpf_t());
mpf_ui_sub(f.get_mpf_t(), 1, f.get_mpf_t());
mpf_pow_ui(f.get_mpf_t(), f.get_mpf_t(), u32(ncurves));
mpf_ui_sub(f.get_mpf_t(), 1, f.get_mpf_t());
return f;
#else
return Prob2(ncurves, nprimes);
#endif
};
auto Work = [&](size_t ncurves, size_t nprimes){
return GetPrimeLog2(nprimes).second * ncurves;
};
size_t maxi = 0, i = 0;
double w0 = Work(curves.size(), num_primes);
auto p0 = Prob2G(curves.size(), num_primes);
using P2GT = std::decay_t<decltype(p0)>;
P2GT maxv = 0;
for (i = 8; i <= (1 << 9); i += 8) {
size_t constexpr dc = 1;
double w1 = Work(curves.size() + dc, num_primes + i), wd = w1 - w0;
P2GT p1 = Prob2G(curves.size() + dc, num_primes + i), pd = p1 - p0;
/*
ASSERT_MSG(w0 < w1 && p0 <= p1,
"w0 " + FloatToStr(w0, 1) + " w1 " + FloatToStr(w1, 1) +
" p0 " + FloatToStr(p0, 6) + " p1 " + FloatToStr(p1, 6) + " (" +
std::to_string(p1) + ") i " + std::to_string(i));
*/
P2GT k = pd / wd;
ASSERT(k >= 0);
if (k >= maxv) {
maxv = k;
maxi = i;
}
if (maxi != 0 && maxi + 20 < i/* && maxv / k > 1.05*/)
break;
}
//ASSERT(i <= (1 << 12));
return maxi == 0 ? 1 << 5 : maxi;
}();
size_t const B_bits = std::min<size_t>(T_bisize - 1, 32), tot_bisize = T_bisize * 4,
task_bits = std::min<size_t>(tot_bisize - 8, 1 << 9);
ASSERT(B_bits < T_bisize);
for (size_t i = 0; i < (iiter == 0 ? 1 : 1); ++i)
curves.push_back(Entry{.curve = ECT::base_gen(size_t(-1), n)});
num_primes += extra_primes;
for (size_t ithr = 0; ithr < nthr; ++ithr)
asyncs.push_back(std::async(std::launch::async, [&, ithr]{
size_t const block = (curves.size() + nthr - 1) / nthr, begin = ithr * block,
end = std::min<size_t>((ithr + 1) * block, curves.size());
for (size_t icurve = begin; icurve < end; ++icurve) {
auto & curve = curves.at(icurve);
if (!curve.active)
continue;
while (curve.done_primes < num_primes) {
typename QWordOf<T>::type tot = 1;
double tot_lsum = 0;
size_t i = 0;
for (i = curve.done_primes; i < num_primes; ++i) {
auto const p = GetPrime<u64>(i);
auto const plog = GetPrimeLog2(i).first;
T mp = p;
double lsum = plog;
while (true) {
if (lsum + plog > B_bits)
break;
mp *= p;
lsum += plog;
}
tot_lsum += lsum;
tot *= mp;
if (tot_lsum + B_bits > task_bits) {
++i;
break;
}
}
curve.done_primes = i;
try {
done_ops += tot_lsum;
curve.curve *= tot;
} catch (typename ECT::InvError const & ex) {
ASSERT(ex.gcd > 1 && n % ex.gcd == 0);
if (ex.gcd == n)
continue;
else {
std::lock_guard<std::mutex> lock(mux);
factors.insert(ex.gcd);
while (n_factored % ex.gcd == 0)
n_factored /= ex.gcd;
n_factored_changed = true;
}
curve.active = false;
break;
}
}
}
}));
for (auto & e: asyncs)
e.get();
asyncs.clear();
if (verbose)
std::cout << "Curves " << std::setw(3) << curves.size() /*<< ", Curve " << curves.back().curve.ToStr()*/
<< ", Ops " << std::fixed << std::setprecision(3) << std::setw(8) << double(done_ops) / 1024
<< " Ki, ExtraPrimes " << std::setw(3) << extra_primes << ", Primes " << std::setw(6) << double(num_primes) / 1024
<< " Ki, IterTime " << std::setw(7) << (Time() - tim) << " sec" << std::endl;
if (done_ops >= limit)
break;
}
std::pair<std::vector<T>, std::vector<T>> res;
T nc = n;
for (auto f: factors) {
ASSERT(nc % f == 0);
while (nc % f == 0) {
res.first.push_back(f);
nc /= f;
}
}
if (nc != 1)
res.second.push_back(nc);
return res;
}
template <typename T>
void OutputFactors(T n, std::pair<std::vector<T>, std::vector<T>> const & fs, bool check = true) {
auto Sorted = [](auto v){
std::sort(v.begin(), v.end());
return v;
};
auto Out = [&](auto const & x){
for (auto f: Sorted(x))
std::cout << IntToStr(f) << " (2^" << std::fixed << std::setprecision(2)
<< std::log2(ToDouble<T>(f)) << (IsProbablyPrime<T>(f) ? ", prime)" : ", composite)") << ", ";
};
if (n != 0)
std::cout << "Num: " << IntToStr(n) << " (2^" << std::fixed << std::setprecision(2) << std::log2(ToDouble<T>(n)) << ")";
std::cout << (fs.first.empty() ? "" : "\nFactored: "); Out(fs.first);
std::cout << (fs.second.empty() ? "" : "\nUnFactored: "); Out(fs.second); std::cout << std::endl;
if (check) {
for (auto const & e0: {fs.first, fs.second})
for (auto const & e1: e0) {
ASSERT(n % e1 == 0);
n /= e1;
}
ASSERT(n == 1);
}
}
template <typename T>
std::pair<std::vector<T>, std::vector<T>> Factor(T const n, size_t sec512 = 3, bool verbose = false) {
using ResT = std::pair<std::vector<T>, std::vector<T>>;
double tt = Time(), t0 = 0, t1 = 0, t2 = 0;
ResT r0{}, r1{}, r2{};
t0 = Time();
r0 = Factor_TrialDiv_Primes<T>(n, std::llround((1 << 22) * sec512 * 0.1));
t0 = Time() - t0;
if (verbose) {
std::cout << "TrialDiv time " << std::fixed << std::setprecision(3) << t0 << " sec" << std::endl;
OutputFactors<T>(n, r0, false);
}
if (!r0.second.empty()) {
t1 = Time();
r1 = Factor_PollardRho<T>(r0.second.front(), std::llround((1 << 17) * sec512 * 0.2));
t1 = Time() - t1;
if (verbose) {
std::cout << "PollardRho time " << t1 << " sec" << std::endl;
OutputFactors<T>(r0.second.front(), r1, false);
}
r0.first.insert(r0.first.end(), r1.first.begin(), r1.first.end());
}
r0.second.clear();
for (auto e: r1.second) {
t2 = Time();
r2 = Factor_ECM<T>(e, std::llround((1 << 12) * sec512 * 0.7), true);
t2 = Time() - t2;
if (verbose) {
std::cout << "ECM time " << t2 << " sec" << std::endl;
OutputFactors<T>(e, r2, false);
}
r0.first.insert(r0.first.end(), r2.first.begin(), r2.first.end());
r0.second.insert(r0.second.end(), r2.second.begin(), r2.second.end());
}
tt = Time() - tt;
if (verbose) {
std::cout << "Final time " << std::fixed << std::setprecision(3) << tt << " sec" << std::endl;
OutputFactors<T>(n, r0, true);
}
return r0;
}
template <typename T>
static T RandPrimeRange(T begin, T end) {
ASSERT(begin < end);
while (true) {
T const p = (RandRange<T>(end - begin) + begin) | 1;
if (IsProbablyPrime<T>(p))
return p;
}
}
template <typename T>
static T RandPrime(size_t bits) {
using DT = typename DWordOf<T>::type;
return RandPrimeRange<T>(T(1) << (bits - 1), T((DT(1) << bits) - 1));
}
int main() {
try {
using T = mpz_class;
for (size_t i = 0; i < 8; ++i)
Factor<T>(RandBits<T>(2048), 60, true);
return 0;
T n = 1;
for (size_t bits: {4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, 56, 64, 72})
n *= RandPrime<T>(bits);
Factor<T>(n, 20, true);
return 0;
} catch (std::exception const & ex) {
std::cout << "Exception: " << ex.what() << std::endl;
return -1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment