Skip to content

Instantly share code, notes, and snippets.

@suisen-cp
Last active November 22, 2023 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save suisen-cp/bd9664d4ccfec56c256632cd28d11e67 to your computer and use it in GitHub Desktop.
Save suisen-cp/bd9664d4ccfec56c256632cd28d11e67 to your computer and use it in GitHub Desktop.
factorial mod prime

各ファイルの先頭にある static constexpr int K = 9; の行で $K = \log _ 2 B$ を設定しているので、ここを書き換えて実行すると実験結果を再現できます。

static constexpr int K = 9;
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <stdexcept>
#include <vector>
namespace io {
int scan_int() {
int v;
if (::scanf("%d", &v) != 1) {
throw std::runtime_error{ "Fail to read." };
}
return v;
}
void print_int(int v) {
if (::printf("%d\n", v) < 0) {
throw std::runtime_error{ "Fail to write." };
}
}
}
#include <atcoder/modint>
#include <atcoder/convolution>
using mint = atcoder::modint998244353;
struct factorial {
factorial() = delete;
static mint fact(int n) {
ensure(n + 1);
return _fact[n];
}
static mint inv_fact(int n) {
ensure(n + 1);
return _inv_fact[n];
}
static void ensure(int size) {
const int curr_size = _fact.size();
if (size <= curr_size) return;
const int next_size = std::max(curr_size * 2, size);
_fact.resize(next_size);
_inv_fact.resize(next_size);
for (int i = curr_size; i < next_size; ++i) {
_fact[i] = _fact[i - 1] * i;
}
_inv_fact.back() = _fact.back().inv();
for (int i = next_size - 1; i > curr_size; --i) {
_inv_fact[i - 1] = _inv_fact[i] * i;
}
}
private:
static inline std::vector<mint> _fact{ 1 }, _inv_fact{ 1 };
};
/**
* Computes f(t),f(t+1),...,f(t+m-1) from f(0),f(1),...,f(n-1)
*/
std::vector<mint> shift_of_sampling_points(const std::vector<mint>& ys, mint t, int m) {
const int n = ys.size();
factorial::ensure(std::max(n, m));
std::vector<mint> b = [&] {
std::vector<mint> f(n), g(n);
for (int i = 0; i < n; ++i) {
f[i] = ys[i] * factorial::inv_fact(i);
g[i] = (i & 1 ? -1 : 1) * factorial::inv_fact(i);
}
std::vector<mint> b = atcoder::convolution(f, g);
b.resize(n);
return b;
}();
std::vector<mint> e = [&] {
std::vector<mint> c(n);
mint prd = 1;
std::reverse(b.begin(), b.end());
for (int i = 0; i < n; ++i) {
b[i] *= factorial::fact(n - i - 1);
c[i] = prd * factorial::inv_fact(i);
prd *= t - i;
}
std::vector<mint> e = atcoder::convolution(b, c);
e.resize(n);
return e;
}();
std::reverse(e.begin(), e.end());
for (int i = 0; i < n; ++i) {
e[i] *= factorial::inv_fact(i);
}
std::vector<mint> f(m);
for (int i = 0; i < m; ++i) {
f[i] = factorial::inv_fact(i);
}
std::vector<mint> res = atcoder::convolution(e, f);
res.resize(m);
for (int i = 0; i < m; ++i) {
res[i] *= factorial::fact(i);
}
return res;
}
/**
* Computes (iB)! for i=0,1,...,floor(P/B) where B=2^15
*/
struct PrecalcFactorial {
static constexpr int LOG_BLOCK_SIZE = K;
static constexpr int BLOCK_SIZE = 1 << LOG_BLOCK_SIZE;
static constexpr int BLOCK_NUM = mint::mod() >> LOG_BLOCK_SIZE;
private:
static inline std::vector<mint> _fact = [] {
std::vector<mint> f{ 1 };
f.reserve(BLOCK_SIZE);
for (int i = 0; i < LOG_BLOCK_SIZE; ++i) {
std::vector<mint> g = shift_of_sampling_points(f, 1 << i, 3 << i);
const auto get = [&](int j) { return j < (1 << i) ? f[j] : g[j - (1 << i)]; };
f.resize(2 << i);
for (int j = 0; j < 2 << i; ++j) {
// (2*j+1)*2^i <= 2^(2*_log_b) + 2^(_log_b-1) < 2^31 holds if _log_b <= 15
f[j] = get(2 * j) * get(2 * j + 1) * ((2 * j + 1) << i);
}
}
// f_B(x) = (x+1) * ... * (x+B-1)
if (BLOCK_NUM > BLOCK_SIZE) {
std::vector<mint> g = shift_of_sampling_points(f, BLOCK_SIZE, BLOCK_NUM - BLOCK_SIZE);
std::move(g.begin(), g.end(), std::back_inserter(f));
} else {
f.resize(BLOCK_NUM);
}
for (int i = 0; i < BLOCK_NUM; ++i) {
f[i] *= mint(i + 1) * BLOCK_SIZE;
}
// f[i] = (i*B + 1) * ... * (i*B + B)
f.insert(f.begin(), 1);
for (int i = 1; i <= BLOCK_NUM; ++i) {
f[i] *= f[i - 1];
}
return f;
}();
public:
PrecalcFactorial() = delete;
// (k * BLOCK_SIZE)!
static mint kth_block_fact(int k) {
return k <= BLOCK_NUM ? _fact[k] : 0;
}
};
std::vector<mint> solve(const std::vector<int>& ns) {
constexpr int block_size = PrecalcFactorial::BLOCK_SIZE;
std::vector<mint> ans;
ans.reserve(ns.size());
for (int n : ns) {
mint res;
int q = n / block_size, r = n % block_size;
if (2 * r <= block_size) {
res = PrecalcFactorial::kth_block_fact(q);
for (int i = 0; i < r; ++i) {
res *= mint::raw(n - i);
}
} else if (q != PrecalcFactorial::BLOCK_NUM) {
res = PrecalcFactorial::kth_block_fact(q + 1);
mint den = 1;
for (int i = 1; i <= block_size - r; ++i) {
den *= mint::raw(n + i);
}
res /= den;
} else {
// Wilson's theorem
res = mint::mod() - 1;
mint den = 1;
for (int i = mint::mod() - 1; i > n; --i) {
den *= mint::raw(i);
}
res /= den;
}
ans.push_back(res);
}
return ans;
}
int main() {
const int t = io::scan_int();
std::vector<int> ns(t);
for (auto&& n : ns) {
n = io::scan_int();
}
const std::vector<mint> ans = solve(ns);
for (const auto& v : ans) {
io::print_int(v.val());
}
}
static constexpr int K = 9;
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstdint>
#include <stdexcept>
#include <vector>
namespace io {
int scan_int() {
int v;
if (::scanf("%d", &v) != 1) {
throw std::runtime_error{ "Fail to read." };
}
return v;
}
void print_int(int v) {
if (::printf("%d\n", v) < 0) {
throw std::runtime_error{ "Fail to write." };
}
}
}
#include <atcoder/modint>
#include <atcoder/convolution>
using mint = atcoder::modint998244353;
using polynomial = std::vector<mint>;
using formal_power_series = std::vector<mint>;
formal_power_series fps_inv(const formal_power_series& f, int n) {
assert(f.size() and f[0] != 0);
formal_power_series g{ f[0].inv() };
for (int k = 1; k < n; k *= 2) {
std::vector<mint> f_fft(f.begin(), f.begin() + std::min<int>(2 * k, f.size()));
std::vector<mint> g_fft(g.begin(), g.end());
f_fft.resize(2 * k);
g_fft.resize(2 * k);
atcoder::internal::butterfly(f_fft);
atcoder::internal::butterfly(g_fft);
std::vector<mint> fg(2 * k);
for (int i = 0; i < 2 * k; ++i) {
fg[i] = f_fft[i] * g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
fg.erase(fg.begin(), fg.begin() + k);
fg.resize(2 * k);
atcoder::internal::butterfly(fg);
for (int i = 0; i < 2 * k; ++i) {
fg[i] *= g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
const mint iz = mint(2 * k).inv(), c = -iz * iz;
g.resize(2 * k);
for (int i = 0; i < k; ++i) {
g[k + i] = fg[i] * c;
}
}
g.resize(n);
return g;
}
polynomial operator+(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] += g[i];
}
return res;
}
polynomial operator-(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] -= g[i];
}
return res;
}
polynomial operator*(const polynomial& f, const polynomial& g) {
return atcoder::convolution(f, g);
}
polynomial operator/(polynomial f, polynomial g) {
while (f.size() and f.back() == 0) f.pop_back();
while (g.size() and g.back() == 0) g.pop_back();
const int fd = f.size() - 1, gd = g.size() - 1;
assert(gd >= 0);
if (fd < gd) {
return {};
}
if (gd == 0) {
mint inv_g0 = g[0].inv();
for (auto&& e : f) e *= inv_g0;
return f;
}
std::reverse(f.begin(), f.end());
std::reverse(g.begin(), g.end());
const int qd = fd - gd;
f.resize(qd + 1);
polynomial q = f * fps_inv(g, qd + 1);
q.resize(qd + 1);
std::reverse(q.begin(), q.end());
return q;
}
polynomial operator%(const polynomial& f, const polynomial& g) {
polynomial q = f / g, r = f - g * q;
while (r.size() and r.back() == 0) r.pop_back();
return r;
}
mint eval(const polynomial& f, const mint& x) {
const int n = f.size();
mint y = 0;
for (int i = n - 1; i >= 0; --i) {
y = uint64_t(y.val()) * x.val() + f[i].val();
}
return y;
}
std::vector<mint> multipoint_evaluation(const polynomial& f, const std::vector<mint>& xs) {
const int n = xs.size();
if (n == 0) {
return {};
}
if (f.size() <= 60) {
std::vector<mint> ys(n);
for (int i = 0; i < n; ++i) {
ys[i] = eval(f, xs[i]);
}
return ys;
}
const int m = [n] {
int m = 1;
while (m < n) m <<= 1;
return m;
}();
std::vector<polynomial> subproduct_tree(2 * m);
for (int i = 0; i < n; ++i) {
subproduct_tree[m + i] = polynomial{ -xs[i], 1 };
}
for (int i = n; i < m; ++i) {
subproduct_tree[m + i] = polynomial{ 1 };
}
for (int i = m - 1; i; --i) {
subproduct_tree[i] = subproduct_tree[2 * i + 0] * subproduct_tree[2 * i + 1];
}
std::vector<polynomial> f_mod_subproduct(2 * m);
f_mod_subproduct[1] = f % subproduct_tree[1];
for (int i = 1; i < m; ++i) {
f_mod_subproduct[2 * i + 0] = f_mod_subproduct[i] % subproduct_tree[2 * i + 0];
f_mod_subproduct[2 * i + 1] = f_mod_subproduct[i] % subproduct_tree[2 * i + 1];
}
std::vector<mint> ys(n);
for (int i = 0; i < n; ++i) {
ys[i] = f_mod_subproduct[m + i].empty() ? 0 : f_mod_subproduct[m + i].front();
}
return ys;
}
struct factorial {
factorial() = delete;
static mint fact(int n) {
ensure(n + 1);
return _fact[n];
}
static mint inv_fact(int n) {
ensure(n + 1);
return _inv_fact[n];
}
static void ensure(int size) {
const int curr_size = _fact.size();
if (size <= curr_size) return;
const int next_size = std::max(curr_size * 2, size);
_fact.resize(next_size);
_inv_fact.resize(next_size);
for (int i = curr_size; i < next_size; ++i) {
_fact[i] = _fact[i - 1] * i;
}
_inv_fact.back() = _fact.back().inv();
for (int i = next_size - 1; i > curr_size; --i) {
_inv_fact[i - 1] = _inv_fact[i] * i;
}
}
private:
static inline std::vector<mint> _fact{ 1 }, _inv_fact{ 1 };
};
/**
* Computes f(t),f(t+1),...,f(t+m-1) from f(0),f(1),...,f(n-1)
*/
std::vector<mint> shift_of_sampling_points(const std::vector<mint>& ys, mint t, int m) {
const int n = ys.size();
factorial::ensure(std::max(n, m));
std::vector<mint> b = [&] {
std::vector<mint> f(n), g(n);
for (int i = 0; i < n; ++i) {
f[i] = ys[i] * factorial::inv_fact(i);
g[i] = (i & 1 ? -1 : 1) * factorial::inv_fact(i);
}
std::vector<mint> b = atcoder::convolution(f, g);
b.resize(n);
return b;
}();
std::vector<mint> e = [&] {
std::vector<mint> c(n);
mint prd = 1;
std::reverse(b.begin(), b.end());
for (int i = 0; i < n; ++i) {
b[i] *= factorial::fact(n - i - 1);
c[i] = prd * factorial::inv_fact(i);
prd *= t - i;
}
std::vector<mint> e = atcoder::convolution(b, c);
e.resize(n);
return e;
}();
std::reverse(e.begin(), e.end());
for (int i = 0; i < n; ++i) {
e[i] *= factorial::inv_fact(i);
}
std::vector<mint> f(m);
for (int i = 0; i < m; ++i) {
f[i] = factorial::inv_fact(i);
}
std::vector<mint> res = atcoder::convolution(e, f);
res.resize(m);
for (int i = 0; i < m; ++i) {
res[i] *= factorial::fact(i);
}
return res;
}
/**
* Computes (iB)! for i=0,1,...,floor(P/B) where B=2^15
*/
struct PrecalcFactorial {
static constexpr int LOG_BLOCK_SIZE = K;
static constexpr int BLOCK_SIZE = 1 << LOG_BLOCK_SIZE;
static constexpr int BLOCK_NUM = mint::mod() >> LOG_BLOCK_SIZE;
private:
static inline std::vector<mint> _fact = [] {
std::vector<mint> f{ 1 };
f.reserve(BLOCK_SIZE);
for (int i = 0; i < LOG_BLOCK_SIZE; ++i) {
std::vector<mint> g = shift_of_sampling_points(f, 1 << i, 3 << i);
const auto get = [&](int j) { return j < (1 << i) ? f[j] : g[j - (1 << i)]; };
f.resize(2 << i);
for (int j = 0; j < 2 << i; ++j) {
// (2*j+1)*2^i <= 2^(2*_log_b) + 2^(_log_b-1) < 2^31 holds if _log_b <= 15
f[j] = get(2 * j) * get(2 * j + 1) * ((2 * j + 1) << i);
}
}
// f_B(x) = (x+1) * ... * (x+B-1)
if (BLOCK_NUM > BLOCK_SIZE) {
std::vector<mint> g = shift_of_sampling_points(f, BLOCK_SIZE, BLOCK_NUM - BLOCK_SIZE);
std::move(g.begin(), g.end(), std::back_inserter(f));
} else {
f.resize(BLOCK_NUM);
}
for (int i = 0; i < BLOCK_NUM; ++i) {
f[i] *= mint(i + 1) * BLOCK_SIZE;
}
// f[i] = (i*B + 1) * ... * (i*B + B)
f.insert(f.begin(), 1);
for (int i = 1; i <= BLOCK_NUM; ++i) {
f[i] *= f[i - 1];
}
return f;
}();
public:
PrecalcFactorial() = delete;
// (k * BLOCK_SIZE)!
static mint kth_block_fact(int k) {
return k <= BLOCK_NUM ? _fact[k] : 0;
}
};
// x(x-1)...(x-n+1)
polynomial stirling_number1_signed(int n) {
factorial::ensure(n);
int l = 0;
while ((n >> l) != 0) ++l;
polynomial a{ 1 };
int m = 0;
while (l--) {
polynomial f(m + 1), g(m + 1);
mint powm = 1;
for (int i = 0; i <= m; ++i, powm *= m) {
f[i] = powm * factorial::inv_fact(i);
g[i] = a[i] * factorial::fact(m - i);
}
f = f * g;
f.resize(m + 1);
for (int i = 0; i <= m; ++i) {
f[i] *= factorial::inv_fact(m - i);
}
a = a * f;
m *= 2;
a.resize(m + 1);
if ((n >> l) & 1) {
a.push_back(0);
for (int i = m; i > 0; --i) {
a[i] += m * a[i - 1];
}
++m;
}
}
for (int i = 0; i <= n; ++i) {
if (i & 1) a[i] = -a[i];
}
std::reverse(a.begin(), a.end());
return a;
}
std::vector<mint> solve(const std::vector<int>& ns) {
constexpr int log_block_size = PrecalcFactorial::LOG_BLOCK_SIZE;
constexpr int block_size = PrecalcFactorial::BLOCK_SIZE;
const int query_num = ns.size();
std::vector<std::vector<std::pair<int, int>>> eval_points(log_block_size);
std::vector<mint> ans(query_num);
for (int i = 0; i < query_num; ++i) if (ns[i] < mint::mod()) {
const int q = ns[i] / block_size, r = ns[i] % block_size;
ans[i] = PrecalcFactorial::kth_block_fact(q);
if (r) {
int n = ns[i];
for (int bit = 0; bit < log_block_size; ++bit) {
if ((r >> bit) & 1) {
eval_points[bit].emplace_back(n, i);
n -= 1 << bit;
}
}
}
}
for (int bit = 0; bit < log_block_size; ++bit) {
const int point_num = eval_points[bit].size();
// x(x-1)...(x-2^bit+1)
const polynomial f = stirling_number1_signed(1 << bit);
for (int l = 0; l < point_num; l += 1 << bit) {
int r = std::min(l + (1 << bit), point_num);
std::vector<mint> xs;
for (int i = l; i < r; ++i) {
const int n = eval_points[bit][i].first;
xs.push_back(n);
}
const std::vector<mint> ys = multipoint_evaluation(f, xs);
for (int i = l; i < r; ++i) {
const int query_id = eval_points[bit][i].second;
ans[query_id] *= ys[i - l];
}
}
}
return ans;
}
int main() {
const int t = io::scan_int();
std::vector<int> ns(t);
for (auto&& n : ns) {
n = io::scan_int();
}
const std::vector<mint> ans = solve(ns);
for (const auto& v : ans) {
io::print_int(v.val());
}
}
static constexpr int K = 9;
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstdint>
#include <stdexcept>
#include <vector>
namespace io {
int scan_int() {
int v;
if (::scanf("%d", &v) != 1) {
throw std::runtime_error{ "Fail to read." };
}
return v;
}
void print_int(int v) {
if (::printf("%d\n", v) < 0) {
throw std::runtime_error{ "Fail to write." };
}
}
}
#include <atcoder/modint>
#include <atcoder/convolution>
using mint = atcoder::modint998244353;
using polynomial = std::vector<mint>;
using formal_power_series = std::vector<mint>;
formal_power_series fps_inv(const formal_power_series& f, int n) {
assert(f.size() and f[0] != 0);
formal_power_series g{ f[0].inv() };
for (int k = 1; k < n; k *= 2) {
std::vector<mint> f_fft(f.begin(), f.begin() + std::min<int>(2 * k, f.size()));
std::vector<mint> g_fft(g.begin(), g.end());
f_fft.resize(2 * k);
g_fft.resize(2 * k);
atcoder::internal::butterfly(f_fft);
atcoder::internal::butterfly(g_fft);
std::vector<mint> fg(2 * k);
for (int i = 0; i < 2 * k; ++i) {
fg[i] = f_fft[i] * g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
fg.erase(fg.begin(), fg.begin() + k);
fg.resize(2 * k);
atcoder::internal::butterfly(fg);
for (int i = 0; i < 2 * k; ++i) {
fg[i] *= g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
const mint iz = mint(2 * k).inv(), c = -iz * iz;
g.resize(2 * k);
for (int i = 0; i < k; ++i) {
g[k + i] = fg[i] * c;
}
}
g.resize(n);
return g;
}
polynomial operator+(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] += g[i];
}
return res;
}
polynomial operator-(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] -= g[i];
}
return res;
}
polynomial operator*(const polynomial& f, const polynomial& g) {
return atcoder::convolution(f, g);
}
polynomial operator/(polynomial f, polynomial g) {
while (f.size() and f.back() == 0) f.pop_back();
while (g.size() and g.back() == 0) g.pop_back();
const int fd = f.size() - 1, gd = g.size() - 1;
assert(gd >= 0);
if (fd < gd) {
return {};
}
if (gd == 0) {
mint inv_g0 = g[0].inv();
for (auto&& e : f) e *= inv_g0;
return f;
}
std::reverse(f.begin(), f.end());
std::reverse(g.begin(), g.end());
const int qd = fd - gd;
f.resize(qd + 1);
polynomial q = f * fps_inv(g, qd + 1);
q.resize(qd + 1);
std::reverse(q.begin(), q.end());
return q;
}
polynomial operator%(const polynomial& f, const polynomial& g) {
polynomial q = f / g, r = f - g * q;
while (r.size() and r.back() == 0) r.pop_back();
return r;
}
mint eval(const polynomial& f, const mint& x) {
const int n = f.size();
mint y = 0;
for (int i = n - 1; i >= 0; --i) {
y = uint64_t(y.val()) * x.val() + f[i].val();
}
return y;
}
std::vector<mint> middle_product(const std::vector<mint>& a, const std::vector<mint>& b) {
const int siz_a = a.size(), siz_b = b.size();
assert(siz_a >= siz_b and siz_b);
if (std::min(siz_b, siz_a - siz_b + 1) <= 60) {
std::vector<mint> res(siz_a - siz_b + 1);
for (int i = 0; i <= siz_a - siz_b; ++i) {
for (int j = 0; j < siz_b; ++j) {
res[i] += b[j] * a[i + j];
}
}
return res;
}
std::vector<mint> res = atcoder::convolution(a, std::vector<mint>(b.rbegin(), b.rend()));
res.resize(siz_a);
res.erase(res.begin(), res.begin() + siz_b - 1);
return res;
}
std::vector<mint> multipoint_evaluation(const polynomial& f, const std::vector<mint> &xs) {
const int n = f.size(), m = xs.size();
if (m == 0) {
return {};
}
if (f.size() <= 60) {
std::vector<mint> ys(n);
for (int i = 0; i < n; ++i) {
ys[i] = eval(f, xs[i]);
}
return ys;
}
int k = 1;
while (k < m) k *= 2;
std::vector<std::vector<mint>> t(2 * k);
for (int i = 0; i < m; ++i) {
t[k + i] = { 1, -xs[i] };
}
for (int i = m; i < k; ++i) {
t[k + i] = { 1, 0 };
}
for (int i = k - 1; i; --i) {
t[i] = t[2 * i] * t[2 * i + 1];
}
polynomial f2 = f;
f2.resize(2 * n - 1);
t[1] = middle_product(f2, fps_inv(t[1], n));
t[1].resize(k);
for (int i = 1; i < k; ++i) {
std::vector<mint> tr = middle_product(t[i], t[2 * i + 0]);
std::vector<mint> tl = middle_product(t[i], t[2 * i + 1]);
t[2 * i + 0] = std::move(tl);
t[2 * i + 1] = std::move(tr);
}
std::vector<mint> ys(m);
for (int i = 0; i < m; ++i) {
ys[i] = t[k + i].empty() ? 0 : t[k + i].front();
}
return ys;
}
struct factorial {
factorial() = delete;
static mint fact(int n) {
ensure(n + 1);
return _fact[n];
}
static mint inv_fact(int n) {
ensure(n + 1);
return _inv_fact[n];
}
static void ensure(int size) {
const int curr_size = _fact.size();
if (size <= curr_size) return;
const int next_size = std::max(curr_size * 2, size);
_fact.resize(next_size);
_inv_fact.resize(next_size);
for (int i = curr_size; i < next_size; ++i) {
_fact[i] = _fact[i - 1] * i;
}
_inv_fact.back() = _fact.back().inv();
for (int i = next_size - 1; i > curr_size; --i) {
_inv_fact[i - 1] = _inv_fact[i] * i;
}
}
private:
static inline std::vector<mint> _fact{ 1 }, _inv_fact{ 1 };
};
/**
* Computes f(t),f(t+1),...,f(t+m-1) from f(0),f(1),...,f(n-1)
*/
std::vector<mint> shift_of_sampling_points(const std::vector<mint>& ys, mint t, int m) {
const int n = ys.size();
factorial::ensure(std::max(n, m));
std::vector<mint> b = [&] {
std::vector<mint> f(n), g(n);
for (int i = 0; i < n; ++i) {
f[i] = ys[i] * factorial::inv_fact(i);
g[i] = (i & 1 ? -1 : 1) * factorial::inv_fact(i);
}
std::vector<mint> b = atcoder::convolution(f, g);
b.resize(n);
return b;
}();
std::vector<mint> e = [&] {
std::vector<mint> c(n);
mint prd = 1;
std::reverse(b.begin(), b.end());
for (int i = 0; i < n; ++i) {
b[i] *= factorial::fact(n - i - 1);
c[i] = prd * factorial::inv_fact(i);
prd *= t - i;
}
std::vector<mint> e = atcoder::convolution(b, c);
e.resize(n);
return e;
}();
std::reverse(e.begin(), e.end());
for (int i = 0; i < n; ++i) {
e[i] *= factorial::inv_fact(i);
}
std::vector<mint> f(m);
for (int i = 0; i < m; ++i) {
f[i] = factorial::inv_fact(i);
}
std::vector<mint> res = atcoder::convolution(e, f);
res.resize(m);
for (int i = 0; i < m; ++i) {
res[i] *= factorial::fact(i);
}
return res;
}
/**
* Computes (iB)! for i=0,1,...,floor(P/B) where B=2^15
*/
struct PrecalcFactorial {
static constexpr int LOG_BLOCK_SIZE = K;
static constexpr int BLOCK_SIZE = 1 << LOG_BLOCK_SIZE;
static constexpr int BLOCK_NUM = mint::mod() >> LOG_BLOCK_SIZE;
private:
static inline std::vector<mint> _fact = [] {
std::vector<mint> f{ 1 };
f.reserve(BLOCK_SIZE);
for (int i = 0; i < LOG_BLOCK_SIZE; ++i) {
std::vector<mint> g = shift_of_sampling_points(f, 1 << i, 3 << i);
const auto get = [&](int j) { return j < (1 << i) ? f[j] : g[j - (1 << i)]; };
f.resize(2 << i);
for (int j = 0; j < 2 << i; ++j) {
// (2*j+1)*2^i <= 2^(2*_log_b) + 2^(_log_b-1) < 2^31 holds if _log_b <= 15
f[j] = get(2 * j) * get(2 * j + 1) * ((2 * j + 1) << i);
}
}
// f_B(x) = (x+1) * ... * (x+B-1)
if (BLOCK_NUM > BLOCK_SIZE) {
std::vector<mint> g = shift_of_sampling_points(f, BLOCK_SIZE, BLOCK_NUM - BLOCK_SIZE);
std::move(g.begin(), g.end(), std::back_inserter(f));
} else {
f.resize(BLOCK_NUM);
}
for (int i = 0; i < BLOCK_NUM; ++i) {
f[i] *= mint(i + 1) * BLOCK_SIZE;
}
// f[i] = (i*B + 1) * ... * (i*B + B)
f.insert(f.begin(), 1);
for (int i = 1; i <= BLOCK_NUM; ++i) {
f[i] *= f[i - 1];
}
return f;
}();
public:
PrecalcFactorial() = delete;
// (k * BLOCK_SIZE)!
static mint kth_block_fact(int k) {
return k <= BLOCK_NUM ? _fact[k] : 0;
}
};
// x(x-1)...(x-n+1)
polynomial stirling_number1_signed(int n) {
factorial::ensure(n);
int l = 0;
while ((n >> l) != 0) ++l;
polynomial a{ 1 };
int m = 0;
while (l--) {
polynomial f(m + 1), g(m + 1);
mint powm = 1;
for (int i = 0; i <= m; ++i, powm *= m) {
f[i] = powm * factorial::inv_fact(i);
g[i] = a[i] * factorial::fact(m - i);
}
f = f * g;
f.resize(m + 1);
for (int i = 0; i <= m; ++i) {
f[i] *= factorial::inv_fact(m - i);
}
a = a * f;
m *= 2;
a.resize(m + 1);
if ((n >> l) & 1) {
a.push_back(0);
for (int i = m; i > 0; --i) {
a[i] += m * a[i - 1];
}
++m;
}
}
for (int i = 0; i <= n; ++i) {
if (i & 1) a[i] = -a[i];
}
std::reverse(a.begin(), a.end());
return a;
}
std::vector<mint> solve(const std::vector<int>& ns) {
constexpr int log_block_size = PrecalcFactorial::LOG_BLOCK_SIZE;
constexpr int block_size = PrecalcFactorial::BLOCK_SIZE;
const int query_num = ns.size();
std::vector<std::vector<std::pair<int, int>>> eval_points(log_block_size);
std::vector<mint> ans(query_num);
for (int i = 0; i < query_num; ++i) if (ns[i] < mint::mod()) {
const int q = ns[i] / block_size, r = ns[i] % block_size;
ans[i] = PrecalcFactorial::kth_block_fact(q);
if (r) {
int n = ns[i];
for (int bit = 0; bit < log_block_size; ++bit) {
if ((r >> bit) & 1) {
eval_points[bit].emplace_back(n, i);
n -= 1 << bit;
}
}
}
}
for (int bit = 0; bit < log_block_size; ++bit) {
const int point_num = eval_points[bit].size();
// x(x-1)...(x-2^bit+1)
const polynomial f = stirling_number1_signed(1 << bit);
for (int l = 0; l < point_num; l += 1 << bit) {
int r = std::min(l + (1 << bit), point_num);
std::vector<mint> xs;
for (int i = l; i < r; ++i) {
const int n = eval_points[bit][i].first;
xs.push_back(n);
}
const std::vector<mint> ys = multipoint_evaluation(f, xs);
for (int i = l; i < r; ++i) {
const int query_id = eval_points[bit][i].second;
ans[query_id] *= ys[i - l];
}
}
}
return ans;
}
int main() {
const int t = io::scan_int();
std::vector<int> ns(t);
for (auto&& n : ns) {
n = io::scan_int();
}
const std::vector<mint> ans = solve(ns);
for (const auto& v : ans) {
io::print_int(v.val());
}
}
static constexpr int K = 9;
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <stdexcept>
#include <vector>
namespace io {
int scan_int() {
int v;
if (::scanf("%d", &v) != 1) {
throw std::runtime_error{ "Fail to read." };
}
return v;
}
void print_int(int v) {
if (::printf("%d\n", v) < 0) {
throw std::runtime_error{ "Fail to write." };
}
}
}
#include <atcoder/modint>
#include <atcoder/convolution>
using mint = atcoder::modint998244353;
using polynomial = std::vector<mint>;
using formal_power_series = std::vector<mint>;
formal_power_series fps_inv(const formal_power_series& f, int n) {
assert(f.size() and f[0] != 0);
formal_power_series g{ f[0].inv() };
for (int k = 1; k < n; k *= 2) {
std::vector<mint> f_fft(f.begin(), f.begin() + std::min<int>(2 * k, f.size()));
std::vector<mint> g_fft(g.begin(), g.end());
f_fft.resize(2 * k);
g_fft.resize(2 * k);
atcoder::internal::butterfly(f_fft);
atcoder::internal::butterfly(g_fft);
std::vector<mint> fg(2 * k);
for (int i = 0; i < 2 * k; ++i) {
fg[i] = f_fft[i] * g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
fg.erase(fg.begin(), fg.begin() + k);
fg.resize(2 * k);
atcoder::internal::butterfly(fg);
for (int i = 0; i < 2 * k; ++i) {
fg[i] *= g_fft[i];
}
atcoder::internal::butterfly_inv(fg);
const mint iz = mint(2 * k).inv(), c = -iz * iz;
g.resize(2 * k);
for (int i = 0; i < k; ++i) {
g[k + i] = fg[i] * c;
}
}
g.resize(n);
return g;
}
polynomial operator+(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] += g[i];
}
return res;
}
polynomial operator-(const polynomial& f, const polynomial& g) {
const int siz_f = f.size(), siz_g = g.size();
polynomial res = f;
if (siz_f < siz_g) {
res.resize(siz_g);
}
for (int i = 0; i < siz_g; ++i) {
res[i] -= g[i];
}
return res;
}
polynomial operator*(const polynomial& f, const polynomial& g) {
return atcoder::convolution(f, g);
}
polynomial operator/(polynomial f, polynomial g) {
while (f.size() and f.back() == 0) f.pop_back();
while (g.size() and g.back() == 0) g.pop_back();
const int fd = f.size() - 1, gd = g.size() - 1;
assert(gd >= 0);
if (fd < gd) {
return {};
}
if (gd == 0) {
mint inv_g0 = g[0].inv();
for (auto&& e : f) e *= inv_g0;
return f;
}
std::reverse(f.begin(), f.end());
std::reverse(g.begin(), g.end());
const int qd = fd - gd;
f.resize(qd + 1);
polynomial q = f * fps_inv(g, qd + 1);
q.resize(qd + 1);
std::reverse(q.begin(), q.end());
return q;
}
polynomial operator%(const polynomial& f, const polynomial& g) {
polynomial q = f / g, r = f - g * q;
while (r.size() and r.back() == 0) r.pop_back();
return r;
}
struct factorial {
factorial() = delete;
static mint fact(int n) {
ensure(n + 1);
return _fact[n];
}
static mint inv_fact(int n) {
ensure(n + 1);
return _inv_fact[n];
}
static void ensure(int size) {
const int curr_size = _fact.size();
if (size <= curr_size) return;
const int next_size = std::max(curr_size * 2, size);
_fact.resize(next_size);
_inv_fact.resize(next_size);
for (int i = curr_size; i < next_size; ++i) {
_fact[i] = _fact[i - 1] * i;
}
_inv_fact.back() = _fact.back().inv();
for (int i = next_size - 1; i > curr_size; --i) {
_inv_fact[i - 1] = _inv_fact[i] * i;
}
}
private:
static inline std::vector<mint> _fact{ 1 }, _inv_fact{ 1 };
};
/**
* Computes f(t),f(t+1),...,f(t+m-1) from f(0),f(1),...,f(n-1)
*/
std::vector<mint> shift_of_sampling_points(const std::vector<mint>& ys, mint t, int m) {
const int n = ys.size();
factorial::ensure(std::max(n, m));
std::vector<mint> b = [&] {
std::vector<mint> f(n), g(n);
for (int i = 0; i < n; ++i) {
f[i] = ys[i] * factorial::inv_fact(i);
g[i] = (i & 1 ? -1 : 1) * factorial::inv_fact(i);
}
std::vector<mint> b = atcoder::convolution(f, g);
b.resize(n);
return b;
}();
std::vector<mint> e = [&] {
std::vector<mint> c(n);
mint prd = 1;
std::reverse(b.begin(), b.end());
for (int i = 0; i < n; ++i) {
b[i] *= factorial::fact(n - i - 1);
c[i] = prd * factorial::inv_fact(i);
prd *= t - i;
}
std::vector<mint> e = atcoder::convolution(b, c);
e.resize(n);
return e;
}();
std::reverse(e.begin(), e.end());
for (int i = 0; i < n; ++i) {
e[i] *= factorial::inv_fact(i);
}
std::vector<mint> f(m);
for (int i = 0; i < m; ++i) {
f[i] = factorial::inv_fact(i);
}
std::vector<mint> res = atcoder::convolution(e, f);
res.resize(m);
for (int i = 0; i < m; ++i) {
res[i] *= factorial::fact(i);
}
return res;
}
/**
* Computes (iB)! for i=0,1,...,floor(P/B) where B=2^15
*/
struct PrecalcFactorial {
static constexpr int LOG_BLOCK_SIZE = K;
static constexpr int BLOCK_SIZE = 1 << LOG_BLOCK_SIZE;
static constexpr int BLOCK_NUM = mint::mod() >> LOG_BLOCK_SIZE;
private:
static inline std::vector<mint> _fact = [] {
std::vector<mint> f{ 1 };
f.reserve(BLOCK_SIZE);
for (int i = 0; i < LOG_BLOCK_SIZE; ++i) {
std::vector<mint> g = shift_of_sampling_points(f, 1 << i, 3 << i);
const auto get = [&](int j) { return j < (1 << i) ? f[j] : g[j - (1 << i)]; };
f.resize(2 << i);
for (int j = 0; j < 2 << i; ++j) {
// (2*j+1)*2^i <= 2^(2*_log_b) + 2^(_log_b-1) < 2^31 holds if _log_b <= 15
f[j] = get(2 * j) * get(2 * j + 1) * ((2 * j + 1) << i);
}
}
// f_B(x) = (x+1) * ... * (x+B-1)
if (BLOCK_NUM > BLOCK_SIZE) {
std::vector<mint> g = shift_of_sampling_points(f, BLOCK_SIZE, BLOCK_NUM - BLOCK_SIZE);
std::move(g.begin(), g.end(), std::back_inserter(f));
} else {
f.resize(BLOCK_NUM);
}
for (int i = 0; i < BLOCK_NUM; ++i) {
f[i] *= mint(i + 1) * BLOCK_SIZE;
}
// f[i] = (i*B + 1) * ... * (i*B + B)
f.insert(f.begin(), 1);
for (int i = 1; i <= BLOCK_NUM; ++i) {
f[i] *= f[i - 1];
}
return f;
}();
public:
PrecalcFactorial() = delete;
// (k * BLOCK_SIZE)!
static mint kth_block_fact(int k) {
return k <= BLOCK_NUM ? _fact[k] : 0;
}
};
std::vector<mint> solve(const std::vector<int>& ns) {
constexpr int block_size = PrecalcFactorial::BLOCK_SIZE;
const int query_num = ns.size();
int leaf_num = block_size - 1;
std::vector<std::vector<std::pair<int, int>>> qs(block_size);
for (int i = 0; i < query_num; ++i) {
if (ns[i] < mint::mod()) {
const int q = ns[i] / block_size, r = ns[i] % block_size;
// (q * B, r)
if (r) {
qs[r].emplace_back(q * block_size, i);
++leaf_num;
}
}
}
int m = 1;
while (m < leaf_num) m <<= 1;
std::vector<int> qid(query_num, -1);
std::vector<polynomial> polys(2 * m), points(2 * m);
{
int i = 0;
for (int r = 1; r < block_size; ++r) {
polys[m + i] = polynomial{ r, 1 };
points[m + i] = polynomial{ 1 };
++i;
for (const auto& [x, qi] : qs[r]) {
qid[qi] = m + i;
polys[m + i] = polynomial{ 1 };
points[m + i] = polynomial{ -x, 1 };
++i;
}
}
for (; i < m; ++i) {
polys[m + i] = polynomial{ 1 };
points[m + i] = polynomial{ 1 };
}
}
for (int i = m - 1; i > 1; --i) {
const int l = 2 * i, r = 2 * i + 1;
points[i] = points[l] * points[r];
polys[i] = polys[l] * polys[r];
}
// reuse space
std::vector<polynomial>& left_polys_mod_points = points;
left_polys_mod_points[1] = polynomial{ 1 };
for (int i = 1; i < m; ++i) {
const int l = 2 * i, r = 2 * i + 1;
left_polys_mod_points[l] = left_polys_mod_points[i] % points[l];
left_polys_mod_points[r] = left_polys_mod_points[i] * polys[l] % points[r];
}
std::vector<mint> ans(query_num);
for (int i = 0; i < query_num; ++i) if (ns[i] < mint::mod()) {
const int q = ns[i] / block_size, r = ns[i] % block_size;
ans[i] = PrecalcFactorial::kth_block_fact(q);
if (r) {
const auto& rem = left_polys_mod_points[qid[i]];
ans[i] *= rem.empty() ? 0 : rem.front();
}
}
return ans;
}
int main() {
const int t = io::scan_int();
std::vector<int> ns(t);
for (auto&& n : ns) {
n = io::scan_int();
}
const std::vector<mint> ans = solve(ns);
for (const auto& v : ans) {
io::print_int(v.val());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment