Skip to content

Instantly share code, notes, and snippets.

@tthsqe12
Last active August 17, 2020 09:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tthsqe12/b808ec0e31cf62aaa478d341910ebd10 to your computer and use it in GitHub Desktop.
Save tthsqe12/b808ec0e31cf62aaa478d341910ebd10 to your computer and use it in GitHub Desktop.
#include "n_poly.h"
#include "nmod_vec.h"
#define MAC3(h, m, l, a, b) \
{ \
mp_limb_t p1, p0; \
umul_ppmm(p1, p0, a, b); \
add_sssaaaaaa(h, m, l, h, m, l, 0, p1, p0); \
}
#define MAC2(h, l, a, b) \
{ \
mp_limb_t p1, p0; \
umul_ppmm(p1, p0, a, b); \
add_ssaaaa(h, l, h, l, p1, p0); \
}
void n_fq_print_pretty(
const mp_limb_t * a,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i;
int first;
first = 1;
for (i = d - 1; i >= 0; i--)
{
if (a[i] == 0)
continue;
if (!first)
flint_printf("+");
first = 0;
flint_printf("%wu", a[i]);
if (i > 0)
{
flint_printf("*%s", ctx->var);
if (i > 1)
flint_printf("^%wd", i);
}
}
if (first)
flint_printf("0");
}
void n_fq_get_fq_nmod(
fq_nmod_t a,
const mp_limb_t * b,
const fq_nmod_ctx_t ctx)
{
slong i;
slong d = fq_nmod_ctx_degree(ctx);
nmod_poly_fit_length(a, d);
for (i = 0; i < d; i++)
a->coeffs[i] = b[i];
a->length = d;
_nmod_poly_normalise(a);
}
void n_fq_set_fq_nmod(
mp_limb_t * a,
const fq_nmod_t b,
const fq_nmod_ctx_t ctx)
{
slong i;
slong d = fq_nmod_ctx_degree(ctx);
FLINT_ASSERT(b->length <= d);
for (i = 0; i < d; i++)
a[i] = i < b->length ? b->coeffs[i] : 0;
}
void n_fq_add_si(
mp_limb_t * a,
const mp_limb_t * b,
slong c,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
if (a != b)
_nmod_vec_set(a, b, d);
if (c < 0)
{
ulong cc = -c;
if (cc >= ctx->mod.n)
NMOD_RED(cc, cc, ctx->mod);
a[0] = nmod_sub(a[0], cc, ctx->mod);
}
else
{
ulong cc = c;
if (cc >= ctx->mod.n)
NMOD_RED(cc, cc, ctx->mod);
a[0] = nmod_add(a[0], cc, ctx->mod);
}
}
void _n_fq_reduce(
mp_limb_t * a,
mp_limb_t * b,
slong blen,
const fq_nmod_ctx_t ctx,
mp_limb_t * t) /* length 2d */
{
slong i, j, k, deg = ctx->modulus->length - 1;
slong d = ctx->j[ctx->len - 1];
FLINT_ASSERT(a != b);
FLINT_ASSERT(0 <= blen && blen <= 2*d - 1);
FLINT_ASSERT(blen == 0 || b[blen - 1] != 0);
if (blen <= deg)
{
for (i = 0; i < blen; i++)
a[i] = b[i];
for (i = blen; i < deg; i++)
a[i] = 0;
}
else if (ctx->sparse_modulus)
{
nmod_t mod = ctx->mod;
for (k = ctx->len - 2; k >= 0; k--)
t[k] = mod.n - ctx->a[k];
for (i = blen - 1; i >= d; i--)
{
for (k = ctx->len - 2; k >= 0; k--)
NMOD_ADDMUL(b[ctx->j[k] + i - d], b[i], t[k], mod);
b[i] = 0;
}
for (i = 0; i < deg; i++)
a[i] = b[i];
}
else
{
/*
_nmod_poly_divrem_newton_n_preinv(t, a, b, blen,
ctx->modulus->coeffs, ctx->modulus->length,
ctx->inv->coeffs, ctx->inv->length,
ctx->mod);
*/
mp_limb_t * Q = t;
mp_limb_t * R = a;
const mp_limb_t * A = b;
slong lenA = blen;
const mp_limb_t * B = ctx->modulus->coeffs;
slong lenB = deg + 1;
const mp_limb_t * Binv = ctx->inv->coeffs;
slong lenBinv = ctx->inv->length;
const slong lenQ = lenA - lenB + 1;
FLINT_ASSERT(lenBinv > 0);
FLINT_ASSERT(lenQ > 0);
if (lenQ <= 2)
{
if (lenQ == 2)
_nmod_poly_divrem_q1(Q, R, A, lenA, B, lenB, ctx->mod);
else
_nmod_poly_divrem_q0(Q, R, A, B, lenB, ctx->mod);
return;
}
if (deg < 20)
{
for (i = 0; i < lenQ; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
j = FLINT_MAX(0, i - lenBinv + 1);
umul_ppmm(t1, t0, A[lenA - 1 - j], Binv[i - j]);
for (j++; j <= i; j++)
MAC3(t2, t1, t0, A[lenA - 1 - j], Binv[i - j]);
NMOD_RED3(Q[lenQ - 1 - i], t2, t1, t0, ctx->mod);
}
for (i = 0; i < deg; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
for (j = FLINT_MAX(0, i - lenQ + 1); j <= i; j++)
MAC3(t2, t1, t0, B[j], Q[i - j]);
NMOD_RED3(t0, t2, t1, t0, ctx->mod);
R[i] = nmod_sub(A[i], t0, ctx->mod);
}
}
else
{
mp_ptr Arev = t + d;
_nmod_poly_reverse(Arev, A + (lenA - lenQ), lenQ, lenQ);
_nmod_poly_mullow(Q, Arev, lenQ, Binv, FLINT_MIN(lenQ, lenBinv), lenQ, ctx->mod);
_nmod_poly_reverse(Q, Q, lenQ, lenQ);
FLINT_ASSERT(lenB > 1);
FLINT_ASSERT(lenQ < lenB - 1);
_nmod_poly_mullow(R, B, lenB - 1, Q, lenQ, lenB - 1, ctx->mod);
_nmod_vec_sub(R, A, R, lenB - 1, ctx->mod);
}
}
}
void _n_fq_madd2(
mp_limb_t * a, /* length 2d */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
const fq_nmod_ctx_t ctx,
mp_limb_t * t) /* length 2d */
{
slong d = ctx->modulus->length - 1;
FLINT_ASSERT(d > 0);
if (d < 30)
{
slong i, j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
mp_limb_t s2 = 0, s1 = 0, s0 = 0;
umul_ppmm(t1, t0, b[i], c[0]);
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
add_ssaaaa(t1, t0, t1, t0, 0, a[i]);
add_ssaaaa(s1, s0, s1, s0, 0, a[2*d - 2 - i]);
for (j = 1; j <= i; j++)
{
MAC3(t2, t1, t0, b[i - j], c[0 + j]);
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
NMOD_RED3(a[i], t2, t1, t0, ctx->mod);
NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod);
}
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
umul_ppmm(t1, t0, b[d - 1], c[0]);
add_ssaaaa(t1, t0, t1, t0, 0, a[d - 1]);
for (j = 1; j < d; j++)
{
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
}
NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod);
}
}
else
{
_nmod_poly_mul(t, b, d, c, d, ctx->mod);
_nmod_vec_add(a, a, t, 2*d - 1, ctx->mod);
}
}
void _n_fq_mul2(
mp_limb_t * a, /* length 2d */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
FLINT_ASSERT(d > 0);
if (d < 30)
{
slong i, j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
mp_limb_t s2 = 0, s1 = 0, s0 = 0;
umul_ppmm(t1, t0, b[i], c[0]);
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
for (j = 1; j <= i; j++)
{
MAC3(t2, t1, t0, b[i - j], c[0 + j]);
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
NMOD_RED3(a[i], t2, t1, t0, ctx->mod);
NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod);
}
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
umul_ppmm(t1, t0, b[d - 1], c[0]);
for (j = 1; j < d; j++)
{
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
}
NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod);
}
}
else
{
_nmod_poly_mul(a, b, d, c, d, ctx->mod);
}
}
/**************************** lazy *******************************************/
int _n_fq_dot_lazy_size(
slong len,
const fq_nmod_ctx_t ctx)
{
ulong t[4];
slong d = fq_nmod_ctx_degree(ctx);
mp_limb_t p = ctx->mod.n;
if (d > 30 || p < 2 || len < 0)
return 0;
umul_ppmm(t[1], t[0], p - 1, p - 1);
t[2] = mpn_mul_1(t, t, 2, d);
t[3] = mpn_mul_1(t, t, 3, len);
if (t[3] != 0)
return 0;
if (t[2] != 0)
return 3;
if (t[1] != 0)
return 2;
return 1;
}
void _n_fq_reduce2_lazy1(
mp_limb_t * a, /* length 6d, 2d used */
slong d,
nmod_t ctx)
{
slong i;
for (i = 0; i < 2*d - 1; i++)
NMOD_RED(a[i], a[i], ctx);
}
void _n_fq_madd2_lazy1(
mp_limb_t * a, /* length 6d, 2d used */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i, j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t0 = 0;
mp_limb_t s0 = 0;
t0 = a[i + 0];
s0 = a[(2*d - 2 - i) + 0];
t0 += b[i]*c[0];
s0 += b[d - 1]*c[d - 1 - i];
for (j = 1; j <= i; j++)
{
t0 += b[i - j]*c[0 + j];
s0 += b[d - 1 - j]*c[d - 1 - i + j];
}
a[i + 0] = t0;
a[(2*d - 2 - i) + 0] = s0;
}
{
mp_limb_t t0 = 0;
t0 = a[(d - 1) + 0];
t0 += b[d - 1]*c[0];
for (j = 1; j < d; j++)
{
t0 += b[d - 1 - j]*c[0 + j];
}
a[(d - 1) + 0] = t0;
}
}
void _n_fq_mul2_lazy1(
mp_limb_t * a, /* length 6d, 2d used */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i,j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t0 = 0;
mp_limb_t s0 = 0;
t0 = b[i]*c[0];
s0 = b[d - 1]*c[d - 1 - i];
for (j = 1; j <= i; j++)
{
t0 += b[i - j]*c[0 + j];
s0 += b[d - 1 - j]*c[d - 1 - i + j];
}
a[i + 0] = t0;
a[(2*d - 2 - i) + 0] = s0;
}
{
mp_limb_t t0 = 0;
t0 = b[d - 1]*c[0];
for (j = 1; j < d; j++)
{
t0 += b[d - 1 - j]*c[0 + j];
}
a[(d - 1) + 0] = t0;
}
}
void _n_fq_reduce2_lazy2(
mp_limb_t * a, /* length 6d, 4d used */
slong d,
nmod_t ctx)
{
slong i;
for (i = 0; i < 2*d - 1; i++)
NMOD2_RED2(a[i], a[2*i + 1], a[2*i + 0], ctx);
}
void _n_fq_madd2_lazy2(
mp_limb_t * a, /* length 6d, 4d used */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i,j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t1 = 0, t0 = 0;
mp_limb_t s1 = 0, s0 = 0;
t0 = a[2*i + 0];
t1 = a[2*i + 1];
s0 = a[2*(2*d - 2 - i) + 0];
s1 = a[2*(2*d - 2 - i) + 1];
MAC2(t1, t0, b[i], c[0]);
MAC2(s1, s0, b[d - 1], c[d - 1 - i]);
for (j = 1; j <= i; j++)
{
MAC2(t1, t0, b[i - j], c[0 + j]);
MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
a[2*i + 0] = t0;
a[2*i + 1] = t1;
a[2*(2*d - 2 - i) + 0] = s0;
a[2*(2*d - 2 - i) + 1] = s1;
}
{
mp_limb_t t1 = 0, t0 = 0;
t0 = a[2*(d - 1) + 0];
t1 = a[2*(d - 1) + 1];
MAC2(t1, t0, b[d - 1], c[0]);
for (j = 1; j < d; j++)
{
MAC2(t1, t0, b[d - 1 - j], c[0 + j]);
}
a[2*(d - 1) + 0] = t0;
a[2*(d - 1) + 1] = t1;
}
}
void _n_fq_mul2_lazy2(
mp_limb_t * a, /* length 6d */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i,j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t1 = 0, t0 = 0;
mp_limb_t s1 = 0, s0 = 0;
umul_ppmm(t1, t0, b[i], c[0]);
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
for (j = 1; j <= i; j++)
{
MAC2(t1, t0, b[i - j], c[0 + j]);
MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
a[2*i + 0] = t0;
a[2*i + 1] = t1;
a[2*(2*d - 2 - i) + 0] = s0;
a[2*(2*d - 2 - i) + 1] = s1;
}
{
mp_limb_t t1 = 0, t0 = 0;
umul_ppmm(t1, t0, b[d - 1], c[0]);
for (j = 1; j < d; j++)
{
MAC2(t1, t0, b[d - 1 - j], c[0 + j]);
}
a[2*(d - 1) + 0] = t0;
a[2*(d - 1) + 1] = t1;
}
}
void _n_fq_reduce2_lazy3(
mp_limb_t * a, /* length 6d */
slong d,
nmod_t ctx)
{
slong i;
for (i = 0; i < 2*d - 1; i++)
NMOD_RED3(a[i], a[3*i + 2], a[3*i + 1], a[3*i + 0], ctx);
}
void _n_fq_madd2_lazy3(
mp_limb_t * a, /* length 6d */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i,j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
mp_limb_t s2 = 0, s1 = 0, s0 = 0;
t0 = a[3*i + 0];
t1 = a[3*i + 1];
t2 = a[3*i + 2];
s0 = a[3*(2*d - 2 - i) + 0];
s1 = a[3*(2*d - 2 - i) + 1];
s2 = a[3*(2*d - 2 - i) + 2];
MAC3(t2, t1, t0, b[i], c[0]);
MAC3(s2, s1, s0, b[d - 1], c[d - 1 - i]);
for (j = 1; j <= i; j++)
{
MAC3(t2, t1, t0, b[i - j], c[0 + j]);
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
a[3*i + 0] = t0;
a[3*i + 1] = t1;
a[3*i + 2] = t2;
a[3*(2*d - 2 - i) + 0] = s0;
a[3*(2*d - 2 - i) + 1] = s1;
a[3*(2*d - 2 - i) + 2] = s2;
}
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
t0 = a[3*(d - 1) + 0];
t1 = a[3*(d - 1) + 1];
t2 = a[3*(d - 1) + 2];
MAC3(t2, t1, t0, b[d - 1], c[0]);
for (j = 1; j < d; j++)
{
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
}
a[3*(d - 1) + 0] = t0;
a[3*(d - 1) + 1] = t1;
a[3*(d - 1) + 2] = t2;
}
}
void _n_fq_mul2_lazy3(
mp_limb_t * a, /* length 6d */
const mp_limb_t * b, /* length d */
const mp_limb_t * c, /* length d */
slong d)
{
slong i,j;
for (i = 0; i + 1 < d; i++)
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
mp_limb_t s2 = 0, s1 = 0, s0 = 0;
umul_ppmm(t1, t0, b[i], c[0]);
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
for (j = 1; j <= i; j++)
{
MAC3(t2, t1, t0, b[i - j], c[0 + j]);
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
}
a[3*i + 0] = t0;
a[3*i + 1] = t1;
a[3*i + 2] = t2;
a[3*(2*d - 2 - i) + 0] = s0;
a[3*(2*d - 2 - i) + 1] = s1;
a[3*(2*d - 2 - i) + 2] = s2;
}
{
mp_limb_t t2 = 0, t1 = 0, t0 = 0;
umul_ppmm(t1, t0, b[d - 1], c[0]);
for (j = 1; j < d; j++)
{
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
}
a[3*(d - 1) + 0] = t0;
a[3*(d - 1) + 1] = t1;
a[3*(d - 1) + 2] = t2;
}
}
void _n_fq_inv(
mp_limb_t * a,
const mp_limb_t * b,
const fq_nmod_ctx_t ctx,
mp_limb_t * t) /* length d */
{
slong d = ctx->modulus->length - 1;
slong blen = d;
FLINT_ASSERT(d > 0);
while (blen > 0 && b[blen - 1] == 0)
blen--;
if (blen < 1)
{
flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv");
}
else if (blen == 1)
{
a[0] = n_invmod(b[0], ctx->mod.n);
_nmod_vec_zero(a + 1, d - 1);
}
else
{
if (1 != _nmod_poly_gcdinv(t, a, b, blen, ctx->modulus->coeffs, d + 1, ctx->mod))
{
flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv");
}
if (t[0] != 1)
{
_nmod_vec_scalar_mul_nmod(a, a, d, n_invmod(t[0], ctx->mod.n), ctx->mod);
}
}
}
void n_fq_mul(
mp_limb_t * a,
const mp_limb_t * b,
const mp_limb_t * c,
const fq_nmod_ctx_t ctx)
{
fq_nmod_t A, B, C;
fq_nmod_init(A, ctx);
fq_nmod_init(B, ctx);
fq_nmod_init(C, ctx);
n_fq_get_fq_nmod(B, b, ctx);
n_fq_get_fq_nmod(C, c, ctx);
fq_nmod_mul(A, B, C, ctx);
n_fq_set_fq_nmod(a, A, ctx);
fq_nmod_clear(A, ctx);
fq_nmod_clear(B, ctx);
fq_nmod_clear(C, ctx);
}
void n_fq_inv(
mp_limb_t * a,
const mp_limb_t * b,
const fq_nmod_ctx_t ctx)
{
fq_nmod_t A, B;
fq_nmod_init(A, ctx);
fq_nmod_init(B, ctx);
n_fq_get_fq_nmod(B, b, ctx);
fq_nmod_inv(A, B, ctx);
n_fq_set_fq_nmod(a, A, ctx);
fq_nmod_clear(A, ctx);
fq_nmod_clear(B, ctx);
}
void _n_fq_pow_ui(
mp_limb_t * a,
const mp_limb_t * b,
ulong e,
const fq_nmod_ctx_t ctx)
{
fq_nmod_t A, B;
fq_nmod_init(A, ctx);
fq_nmod_init(B, ctx);
n_fq_get_fq_nmod(B, b, ctx);
fq_nmod_pow_ui(A, B, e, ctx);
n_fq_set_fq_nmod(a, A, ctx);
fq_nmod_clear(A, ctx);
fq_nmod_clear(B, ctx);
}
void n_fq_pow_ui(
mp_limb_t * a,
const mp_limb_t * b,
ulong e,
const fq_nmod_ctx_t ctx)
{
fq_nmod_t A, B;
fq_nmod_init(A, ctx);
fq_nmod_init(B, ctx);
n_fq_get_fq_nmod(B, b, ctx);
fq_nmod_pow_ui(A, B, e, ctx);
n_fq_set_fq_nmod(a, A, ctx);
fq_nmod_clear(A, ctx);
fq_nmod_clear(B, ctx);
}
int n_fq_is_canonical(
const mp_limb_t * a,
const fq_nmod_ctx_t ctx)
{
slong i, d = fq_nmod_ctx_degree(ctx);
for (i = 0; i < d; i++)
{
if (a[i] >= ctx->mod.n)
return 0;
}
return 1;
}
#include "fq_nmod_mpoly.h"
#include "n_poly.h"
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0600
#include <conio.h>
#include <windows.h>
typedef struct {
mp_limb_t * coeffs;
ulong * exps;
slong length;
flint_bitcnt_t bits; /* number of bits per exponent */
slong coeffs_alloc; /* abs size in mp_limb_t units */
slong exps_alloc; /* abs size in ulong units */
} new_fq_nmod_mpoly_struct;
typedef new_fq_nmod_mpoly_struct new_fq_nmod_mpoly_t[1];
void new_fq_nmod_mpoly_init(
new_fq_nmod_mpoly_t A,
const fq_nmod_mpoly_ctx_t ctx)
{
A->coeffs = NULL;
A->exps = NULL;
A->length = 0;
A->bits = MPOLY_MIN_BITS;
A->coeffs_alloc = 0;
A->exps_alloc = 0;
}
void new_fq_nmod_mpoly_clear(
new_fq_nmod_mpoly_t A,
const fq_nmod_mpoly_ctx_t ctx)
{
if (A->coeffs_alloc > 0)
flint_free(A->coeffs);
if (A->exps_alloc > 0)
flint_free(A->exps);
}
void new_fq_nmod_mpoly_swap(
new_fq_nmod_mpoly_t A,
new_fq_nmod_mpoly_t B,
const fq_nmod_mpoly_ctx_t ctx)
{
new_fq_nmod_mpoly_struct t = *A;
*A = *B;
*B = t;
}
int n_fq_fprint_pretty(
FILE * file,
const mp_limb_t * a,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i;
int first;
first = 1;
for (i = d - 1; i >= 0; i--)
{
if (a[i] == 0)
continue;
if (!first)
flint_fprintf(file, "+");
first = 0;
flint_fprintf(file, "%wu", a[i]);
if (i > 0)
{
flint_fprintf(file, "*%s", ctx->var);
if (i > 1)
flint_fprintf(file, "^%wd", i);
}
}
if (first)
flint_fprintf(file, "0");
return 1;
}
int new_fq_nmod_mpoly_fprint_pretty(
FILE * file,
const new_fq_nmod_mpoly_t A,
const char ** x_in,
const fq_nmod_mpoly_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx->fqctx);
slong len = A->length;
ulong * exp = A->exps;
slong bits = A->bits;
slong i, j, N;
fmpz * exponents;
int r = 0;
char ** x = (char **) x_in;
TMP_INIT;
if (len == 0)
{
r = (EOF != fputc('0', file));
return r;
}
#define CHECK_r if (r <= 0) goto done;
N = mpoly_words_per_exp(bits, ctx->minfo);
TMP_START;
if (x == NULL)
{
x = (char **) TMP_ALLOC(ctx->minfo->nvars*sizeof(char *));
for (i = 0; i < ctx->minfo->nvars; i++)
{
x[i] = (char *) TMP_ALLOC(((FLINT_BITS+4)/3)*sizeof(char));
flint_sprintf(x[i], "x%wd", i+1);
}
}
exponents = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(ulong));
for (i = 0; i < ctx->minfo->nvars; i++)
fmpz_init(exponents + i);
for (i = 0; i < len; i++)
{
if (i > 0)
{
r = flint_fprintf(file, " + ");
CHECK_r
}
r = flint_fprintf(file, "(");
CHECK_r
r = n_fq_fprint_pretty(file, A->coeffs + d*i, ctx->fqctx);
CHECK_r
r = flint_fprintf(file, ")");
CHECK_r
mpoly_get_monomial_ffmpz(exponents, exp + N*i, bits, ctx->minfo);
for (j = 0; j < ctx->minfo->nvars; j++)
{
int cmp = fmpz_cmp_ui(exponents + j, 1);
if (cmp > 0)
{
r = flint_fprintf(file, "*%s^", x[j]);
CHECK_r
r = fmpz_fprint(file, exponents + j);
CHECK_r
}
else if (cmp == 0)
{
r = flint_fprintf(file, "*%s", x[j]);
CHECK_r
}
}
}
done:
for (i = 0; i < ctx->minfo->nvars; i++)
fmpz_clear(exponents + i);
TMP_END;
return r;
}
void new_fq_nmod_mpoly_print_pretty(
const new_fq_nmod_mpoly_t A,
const char ** x,
const fq_nmod_mpoly_ctx_t ctx)
{
new_fq_nmod_mpoly_fprint_pretty(stdout, A, x, ctx);
fflush(stdout);
}
void _new_fq_nmod_mpoly_fit_length(
mp_limb_t ** coeffs,
slong * coeffs_alloc,
slong d,
ulong ** exps,
slong * exps_alloc,
slong N,
slong length)
{
if (d*length > *coeffs_alloc)
{
*coeffs_alloc = FLINT_MAX(d*length, *coeffs_alloc*2);
*coeffs = flint_realloc(*coeffs, *coeffs_alloc*sizeof(mp_limb_t));
}
if (N*length > *exps_alloc)
{
*exps_alloc = FLINT_MAX(N*length, *exps_alloc*2);
*exps = flint_realloc(*exps, *exps_alloc*sizeof(ulong));
}
}
void new_fq_nmod_mpoly_fit_length_set_bits(
new_fq_nmod_mpoly_t A,
slong length,
slong bits,
const fq_nmod_mpoly_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx->fqctx);
slong N = mpoly_words_per_exp(bits, ctx->minfo);
_new_fq_nmod_mpoly_fit_length(&A->coeffs, &A->coeffs_alloc, d,
&A->exps, &A->exps_alloc, N, length);
A->bits = bits;
}
void new_fq_nmod_mpoly_zero(
new_fq_nmod_mpoly_t A,
const fq_nmod_mpoly_ctx_t ctx)
{
A->length = 0;
}
void new_fq_nmod_mpoly_set_fq_nmod_mpoly(
new_fq_nmod_mpoly_t A,
const fq_nmod_mpoly_t B,
const fq_nmod_mpoly_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx->fqctx);
slong N = mpoly_words_per_exp(B->bits, ctx->minfo);
slong i;
new_fq_nmod_mpoly_fit_length_set_bits(A, B->length, B->bits, ctx);
mpoly_copy_monomials(A->exps, B->exps, B->length, N);
for (i = 0; i < B->length; i++)
n_fq_set_fq_nmod(A->coeffs + d*i, B->coeffs + i, ctx->fqctx);
A->length = B->length;
}
void new_fq_nmod_mpoly_get_fq_nmod_mpoly(
fq_nmod_mpoly_t A,
const new_fq_nmod_mpoly_t B,
const fq_nmod_mpoly_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx->fqctx);
slong N = mpoly_words_per_exp(B->bits, ctx->minfo);
slong i;
fq_nmod_mpoly_fit_bits(A, B->bits, ctx);
A->bits = B->bits;
fq_nmod_mpoly_fit_length(A, B->length, ctx);
mpoly_copy_monomials(A->exps, B->exps, B->length, N);
for (i = 0; i < B->length; i++)
n_fq_get_fq_nmod(A->coeffs + i, B->coeffs + d*i, ctx->fqctx);
A->length = B->length;
}
int new_fq_nmod_mpoly_equal(
const new_fq_nmod_mpoly_t A,
const new_fq_nmod_mpoly_t B,
const fq_nmod_mpoly_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx->fqctx);
if (A == B)
return 1;
if (A->length != B->length)
return 0;
if (!_nmod_vec_equal(A->coeffs, B->coeffs, d*B->length))
return 0;
return !mpoly_monomials_cmp(A->exps, A->bits, B->exps, B->bits,
B->length, ctx->minfo);
}
void _new_fq_nmod_mpoly_mul_johnson1(
new_fq_nmod_mpoly_t A,
const mp_limb_t * Bcoeffs,
const ulong * Bexps,
slong Blen,
const mp_limb_t * Ccoeffs,
const ulong * Cexps,
slong Clen,
ulong maskhi,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i, j;
slong next_loc;
slong heap_len = 2; /* heap zero index unused */
mpoly_heap1_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
slong * hind;
ulong exp;
mp_limb_t * t;
int lazy_size = _n_fq_dot_lazy_size(Blen, ctx);
mp_limb_t * Acoeffs = A->coeffs;
ulong * Aexps = A->exps;
slong Acoeffs_alloc = A->coeffs_alloc;
slong Aexps_alloc = A->exps_alloc;
slong Alen;
TMP_INIT;
TMP_START;
next_loc = Blen + 4; /* something bigger than heap can ever be */
heap = (mpoly_heap1_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap1_s));
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong));
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong));
t = (mp_limb_t *) TMP_ALLOC(6*d*sizeof(mp_limb_t));
for (i = 0; i < Blen; i++)
hind[i] = 1;
/* put (0, 0, exp2[0] + exp3[0]) on heap */
x = chain + 0;
x->i = 0;
x->j = 0;
x->next = NULL;
HEAP_ASSIGN(heap[1], Bexps[0] + Cexps[0], x);
hind[0] = 2*1 + 0;
Alen = 0;
while (heap_len > 1)
{
exp = heap[1].exp;
_new_fq_nmod_mpoly_fit_length(&Acoeffs, &Acoeffs_alloc, d,
&Aexps, &Aexps_alloc, 1, Alen + 1);
Aexps[Alen] = exp;
_nmod_vec_zero(t, 6*d);
switch (lazy_size)
{
case 1:
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy1(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
_n_fq_reduce2_lazy1(t, d, ctx->mod);
break;
case 2:
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy2(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
_n_fq_reduce2_lazy2(t, d, ctx->mod);
break;
case 3:
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy3(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
_n_fq_reduce2_lazy3(t, d, ctx->mod);
break;
default:
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2(t, Bcoeffs + d*x->i,
Ccoeffs + d*x->j, ctx, t + 2*d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
break;
}
_n_fq_reduce2(Acoeffs + d*Alen, t, ctx, t + 2*d);
Alen += !_n_fq_is_zero(Acoeffs + d*Alen, d);
while (store > store_base)
{
j = *--store;
i = *--store;
/* should we go right? */
if ((i + 1 < Blen) &&
(hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexps[x->i] + Cexps[x->j], x,
&next_loc, &heap_len, maskhi);
}
/* should we go up? */
if ((j + 1 < Clen) &&
((hind[i] & 1) == 1) &&
((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexps[x->i] + Cexps[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
}
A->coeffs = Acoeffs;
A->exps = Aexps;
A->coeffs_alloc = Acoeffs_alloc;
A->exps_alloc = Aexps_alloc;
A->length = Alen;
TMP_END;
}
void _new_fq_nmod_mpoly_mul_johnson(
new_fq_nmod_mpoly_t A,
const mp_limb_t * Bcoeffs,
const ulong * Bexps,
slong Blen,
const mp_limb_t * Ccoeffs,
const ulong * Cexps,
slong Clen,
flint_bitcnt_t bits,
slong N,
const ulong * cmpmask,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i, j;
slong next_loc;
slong heap_len = 2; /* heap zero index unused */
mpoly_heap_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
ulong * exp, * exps;
ulong ** exp_list;
slong exp_next;
slong * hind;
mp_limb_t * t;
int lazy_size = _n_fq_dot_lazy_size(Blen, ctx);
mp_limb_t * Acoeffs = A->coeffs;
ulong * Aexps = A->exps;
slong Acoeffs_alloc = A->coeffs_alloc;
slong Aexps_alloc = A->exps_alloc;
slong Alen;
TMP_INIT;
FLINT_ASSERT(Blen > 0);
FLINT_ASSERT(Clen > 0);
FLINT_ASSERT(A->bits == bits);
if (N == 1)
{
_new_fq_nmod_mpoly_mul_johnson1(A, Bcoeffs, Bexps, Blen,
Ccoeffs, Cexps, Clen, cmpmask[0], ctx);
return;
}
TMP_START;
next_loc = Blen + 4; /* something bigger than heap can ever be */
heap = (mpoly_heap_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap_s));
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong));
exps = (ulong *) TMP_ALLOC(Blen*N*sizeof(ulong));
exp_list = (ulong **) TMP_ALLOC(Blen*sizeof(ulong *));
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong));
t = (mp_limb_t *) TMP_ALLOC(6*d*sizeof(mp_limb_t));
for (i = 0; i < Blen; i++)
{
exp_list[i] = exps + i*N;
hind[i] = 1;
}
/* start with no heap nodes and no exponent vectors in use */
exp_next = 0;
/* put (0, 0, exp2[0] + exp3[0]) on heap */
x = chain + 0;
x->i = 0;
x->j = 0;
x->next = NULL;
heap[1].next = x;
heap[1].exp = exp_list[exp_next++];
if (bits <= FLINT_BITS)
mpoly_monomial_add(heap[1].exp, Bexps + N*0, Cexps + N*0, N);
else
mpoly_monomial_add_mp(heap[1].exp, Bexps + N*0, Cexps + N*0, N);
hind[0] = 2*1 + 0;
Alen = 0;
while (heap_len > 1)
{
exp = heap[1].exp;
_new_fq_nmod_mpoly_fit_length(&Acoeffs, &Acoeffs_alloc, d,
&Aexps, &Aexps_alloc, N, Alen + 1);
mpoly_monomial_set(Aexps + N*Alen, exp, N);
_nmod_vec_zero(t, 6*d);
switch (lazy_size)
{
case 1:
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy1(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
_n_fq_reduce2_lazy1(t, d, ctx->mod);
break;
case 2:
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy2(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
_n_fq_reduce2_lazy2(t, d, ctx->mod);
break;
case 3:
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2_lazy3(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
_n_fq_reduce2_lazy3(t, d, ctx->mod);
break;
default:
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
_n_fq_madd2(t, Bcoeffs + d*x->i,
Ccoeffs + d*x->j, ctx, t + 2*d);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
break;
}
_n_fq_reduce2(Acoeffs + d*Alen, t, ctx, t + 2*d);
Alen += !_n_fq_is_zero(Acoeffs + d*Alen, d);
while (store > store_base)
{
j = *--store;
i = *--store;
/* should we go right? */
if ((i + 1 < Blen) &&
(hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Bexps + x->i*N,
Cexps + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N,
Cexps + x->j*N, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
/* should we go up? */
if ((j + 1 < Clen) &&
((hind[i] & 1) == 1) &&
((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Bexps + x->i*N,
Cexps + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N,
Cexps + x->j*N, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
}
}
A->coeffs = Acoeffs;
A->exps = Aexps;
A->coeffs_alloc = Acoeffs_alloc;
A->exps_alloc = Aexps_alloc;
A->length = Alen;
TMP_END;
}
void new_fq_nmod_mpoly_mul_johnson(
new_fq_nmod_mpoly_t A,
const new_fq_nmod_mpoly_t B,
const new_fq_nmod_mpoly_t C,
const fq_nmod_mpoly_ctx_t ctx)
{
slong i, N, Alen;
flint_bitcnt_t Abits;
fmpz * Bmaxfields, * Cmaxfields;
ulong * cmpmask;
ulong * Bexps = B->exps, * Cexps = C->exps;
int freeBexps = 0, freeCexps = 0;
new_fq_nmod_mpoly_struct * P, T[1];
TMP_INIT;
if (B->length < 1 || C->length < 1)
{
new_fq_nmod_mpoly_zero(A, ctx);
return;
}
TMP_START;
Bmaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
Cmaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
for (i = 0; i < ctx->minfo->nfields; i++)
{
fmpz_init(Bmaxfields + i);
fmpz_init(Cmaxfields + i);
}
mpoly_max_fields_fmpz(Bmaxfields, Bexps, B->length, B->bits, ctx->minfo);
mpoly_max_fields_fmpz(Cmaxfields, Cexps, C->length, C->bits, ctx->minfo);
_fmpz_vec_add(Bmaxfields, Bmaxfields, Cmaxfields, ctx->minfo->nfields);
Abits = _fmpz_vec_max_bits(Bmaxfields, ctx->minfo->nfields);
Abits = FLINT_MAX(MPOLY_MIN_BITS, Abits + 1);
Abits = FLINT_MAX(Abits, B->bits);
Abits = FLINT_MAX(Abits, C->bits);
Abits = mpoly_fix_bits(Abits, ctx->minfo);
for (i = 0; i < ctx->minfo->nfields; i++)
{
fmpz_clear(Bmaxfields + i);
fmpz_clear(Cmaxfields + i);
}
N = mpoly_words_per_exp(Abits, ctx->minfo);
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
/* ensure input exponents are packed into same sized fields as output */
if (Abits != B->bits)
{
freeBexps = 1;
Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, B->length, ctx->minfo);
}
if (Abits != C->bits)
{
freeCexps = 1;
Cexps = (ulong *) flint_malloc(N*C->length*sizeof(ulong));
mpoly_repack_monomials(Cexps, Abits, C->exps, C->bits, C->length, ctx->minfo);
}
if (A == B || A == C)
{
new_fq_nmod_mpoly_init(T, ctx);
P = T;
}
else
{
P = A;
}
new_fq_nmod_mpoly_fit_length_set_bits(P, B->length + C->length, Abits, ctx);
if (B->length > C->length)
{
_new_fq_nmod_mpoly_mul_johnson(P, C->coeffs, Cexps, C->length,
B->coeffs, Bexps, B->length, Abits, N, cmpmask, ctx->fqctx);
}
else
{
_new_fq_nmod_mpoly_mul_johnson(P, B->coeffs, Bexps, B->length,
C->coeffs, Cexps, C->length, Abits, N, cmpmask, ctx->fqctx);
}
if (A == B || A == C)
{
new_fq_nmod_mpoly_swap(A, T, ctx);
new_fq_nmod_mpoly_clear(T, ctx);
}
if (freeBexps)
flint_free(Bexps);
if (freeCexps)
flint_free(Cexps);
TMP_END;
}
void fq_nmod_mpoly_mul(fq_nmod_mpoly_t A, const fq_nmod_mpoly_t B,
const fq_nmod_mpoly_t C, const fq_nmod_mpoly_ctx_t ctx)
{
new_fq_nmod_mpoly_t a, b, c, d;
ulong t1, t2;
new_fq_nmod_mpoly_init(a, ctx);
new_fq_nmod_mpoly_init(b, ctx);
new_fq_nmod_mpoly_init(c, ctx);
new_fq_nmod_mpoly_init(d, ctx);
new_fq_nmod_mpoly_set_fq_nmod_mpoly(a, A, ctx);
new_fq_nmod_mpoly_set_fq_nmod_mpoly(b, B, ctx);
new_fq_nmod_mpoly_set_fq_nmod_mpoly(c, C, ctx);
t1 = GetTickCount64();
new_fq_nmod_mpoly_mul_johnson(a, b, c, ctx);
t1 = GetTickCount64() - t1;
t2 = GetTickCount64();
fq_nmod_mpoly_mul_johnson(A, B, C, ctx);
t2 = GetTickCount64() - t2;
flint_printf("new: %wu old: %wu ratio: %f\n", t1, t2, (double) t2 / (double) t1);
new_fq_nmod_mpoly_set_fq_nmod_mpoly(d, A, ctx);
if (!new_fq_nmod_mpoly_equal(a, d, ctx))
{
flint_printf("oops\n");
flint_abort();
}
new_fq_nmod_mpoly_clear(a, ctx);
new_fq_nmod_mpoly_clear(b, ctx);
new_fq_nmod_mpoly_clear(c, ctx);
new_fq_nmod_mpoly_clear(d, ctx);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment