Last active
August 17, 2020 09:15
-
-
Save tthsqe12/b808ec0e31cf62aaa478d341910ebd10 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "n_poly.h" | |
#include "nmod_vec.h" | |
#define MAC3(h, m, l, a, b) \ | |
{ \ | |
mp_limb_t p1, p0; \ | |
umul_ppmm(p1, p0, a, b); \ | |
add_sssaaaaaa(h, m, l, h, m, l, 0, p1, p0); \ | |
} | |
#define MAC2(h, l, a, b) \ | |
{ \ | |
mp_limb_t p1, p0; \ | |
umul_ppmm(p1, p0, a, b); \ | |
add_ssaaaa(h, l, h, l, p1, p0); \ | |
} | |
void n_fq_print_pretty( | |
const mp_limb_t * a, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
slong i; | |
int first; | |
first = 1; | |
for (i = d - 1; i >= 0; i--) | |
{ | |
if (a[i] == 0) | |
continue; | |
if (!first) | |
flint_printf("+"); | |
first = 0; | |
flint_printf("%wu", a[i]); | |
if (i > 0) | |
{ | |
flint_printf("*%s", ctx->var); | |
if (i > 1) | |
flint_printf("^%wd", i); | |
} | |
} | |
if (first) | |
flint_printf("0"); | |
} | |
void n_fq_get_fq_nmod( | |
fq_nmod_t a, | |
const mp_limb_t * b, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong i; | |
slong d = fq_nmod_ctx_degree(ctx); | |
nmod_poly_fit_length(a, d); | |
for (i = 0; i < d; i++) | |
a->coeffs[i] = b[i]; | |
a->length = d; | |
_nmod_poly_normalise(a); | |
} | |
void n_fq_set_fq_nmod( | |
mp_limb_t * a, | |
const fq_nmod_t b, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong i; | |
slong d = fq_nmod_ctx_degree(ctx); | |
FLINT_ASSERT(b->length <= d); | |
for (i = 0; i < d; i++) | |
a[i] = i < b->length ? b->coeffs[i] : 0; | |
} | |
void n_fq_add_si( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
slong c, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
if (a != b) | |
_nmod_vec_set(a, b, d); | |
if (c < 0) | |
{ | |
ulong cc = -c; | |
if (cc >= ctx->mod.n) | |
NMOD_RED(cc, cc, ctx->mod); | |
a[0] = nmod_sub(a[0], cc, ctx->mod); | |
} | |
else | |
{ | |
ulong cc = c; | |
if (cc >= ctx->mod.n) | |
NMOD_RED(cc, cc, ctx->mod); | |
a[0] = nmod_add(a[0], cc, ctx->mod); | |
} | |
} | |
void _n_fq_reduce( | |
mp_limb_t * a, | |
mp_limb_t * b, | |
slong blen, | |
const fq_nmod_ctx_t ctx, | |
mp_limb_t * t) /* length 2d */ | |
{ | |
slong i, j, k, deg = ctx->modulus->length - 1; | |
slong d = ctx->j[ctx->len - 1]; | |
FLINT_ASSERT(a != b); | |
FLINT_ASSERT(0 <= blen && blen <= 2*d - 1); | |
FLINT_ASSERT(blen == 0 || b[blen - 1] != 0); | |
if (blen <= deg) | |
{ | |
for (i = 0; i < blen; i++) | |
a[i] = b[i]; | |
for (i = blen; i < deg; i++) | |
a[i] = 0; | |
} | |
else if (ctx->sparse_modulus) | |
{ | |
nmod_t mod = ctx->mod; | |
for (k = ctx->len - 2; k >= 0; k--) | |
t[k] = mod.n - ctx->a[k]; | |
for (i = blen - 1; i >= d; i--) | |
{ | |
for (k = ctx->len - 2; k >= 0; k--) | |
NMOD_ADDMUL(b[ctx->j[k] + i - d], b[i], t[k], mod); | |
b[i] = 0; | |
} | |
for (i = 0; i < deg; i++) | |
a[i] = b[i]; | |
} | |
else | |
{ | |
/* | |
_nmod_poly_divrem_newton_n_preinv(t, a, b, blen, | |
ctx->modulus->coeffs, ctx->modulus->length, | |
ctx->inv->coeffs, ctx->inv->length, | |
ctx->mod); | |
*/ | |
mp_limb_t * Q = t; | |
mp_limb_t * R = a; | |
const mp_limb_t * A = b; | |
slong lenA = blen; | |
const mp_limb_t * B = ctx->modulus->coeffs; | |
slong lenB = deg + 1; | |
const mp_limb_t * Binv = ctx->inv->coeffs; | |
slong lenBinv = ctx->inv->length; | |
const slong lenQ = lenA - lenB + 1; | |
FLINT_ASSERT(lenBinv > 0); | |
FLINT_ASSERT(lenQ > 0); | |
if (lenQ <= 2) | |
{ | |
if (lenQ == 2) | |
_nmod_poly_divrem_q1(Q, R, A, lenA, B, lenB, ctx->mod); | |
else | |
_nmod_poly_divrem_q0(Q, R, A, B, lenB, ctx->mod); | |
return; | |
} | |
if (deg < 20) | |
{ | |
for (i = 0; i < lenQ; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
j = FLINT_MAX(0, i - lenBinv + 1); | |
umul_ppmm(t1, t0, A[lenA - 1 - j], Binv[i - j]); | |
for (j++; j <= i; j++) | |
MAC3(t2, t1, t0, A[lenA - 1 - j], Binv[i - j]); | |
NMOD_RED3(Q[lenQ - 1 - i], t2, t1, t0, ctx->mod); | |
} | |
for (i = 0; i < deg; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
for (j = FLINT_MAX(0, i - lenQ + 1); j <= i; j++) | |
MAC3(t2, t1, t0, B[j], Q[i - j]); | |
NMOD_RED3(t0, t2, t1, t0, ctx->mod); | |
R[i] = nmod_sub(A[i], t0, ctx->mod); | |
} | |
} | |
else | |
{ | |
mp_ptr Arev = t + d; | |
_nmod_poly_reverse(Arev, A + (lenA - lenQ), lenQ, lenQ); | |
_nmod_poly_mullow(Q, Arev, lenQ, Binv, FLINT_MIN(lenQ, lenBinv), lenQ, ctx->mod); | |
_nmod_poly_reverse(Q, Q, lenQ, lenQ); | |
FLINT_ASSERT(lenB > 1); | |
FLINT_ASSERT(lenQ < lenB - 1); | |
_nmod_poly_mullow(R, B, lenB - 1, Q, lenQ, lenB - 1, ctx->mod); | |
_nmod_vec_sub(R, A, R, lenB - 1, ctx->mod); | |
} | |
} | |
} | |
void _n_fq_madd2( | |
mp_limb_t * a, /* length 2d */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
const fq_nmod_ctx_t ctx, | |
mp_limb_t * t) /* length 2d */ | |
{ | |
slong d = ctx->modulus->length - 1; | |
FLINT_ASSERT(d > 0); | |
if (d < 30) | |
{ | |
slong i, j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
mp_limb_t s2 = 0, s1 = 0, s0 = 0; | |
umul_ppmm(t1, t0, b[i], c[0]); | |
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]); | |
add_ssaaaa(t1, t0, t1, t0, 0, a[i]); | |
add_ssaaaa(s1, s0, s1, s0, 0, a[2*d - 2 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC3(t2, t1, t0, b[i - j], c[0 + j]); | |
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
NMOD_RED3(a[i], t2, t1, t0, ctx->mod); | |
NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod); | |
} | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
umul_ppmm(t1, t0, b[d - 1], c[0]); | |
add_ssaaaa(t1, t0, t1, t0, 0, a[d - 1]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod); | |
} | |
} | |
else | |
{ | |
_nmod_poly_mul(t, b, d, c, d, ctx->mod); | |
_nmod_vec_add(a, a, t, 2*d - 1, ctx->mod); | |
} | |
} | |
void _n_fq_mul2( | |
mp_limb_t * a, /* length 2d */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
FLINT_ASSERT(d > 0); | |
if (d < 30) | |
{ | |
slong i, j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
mp_limb_t s2 = 0, s1 = 0, s0 = 0; | |
umul_ppmm(t1, t0, b[i], c[0]); | |
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC3(t2, t1, t0, b[i - j], c[0 + j]); | |
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
NMOD_RED3(a[i], t2, t1, t0, ctx->mod); | |
NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod); | |
} | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
umul_ppmm(t1, t0, b[d - 1], c[0]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod); | |
} | |
} | |
else | |
{ | |
_nmod_poly_mul(a, b, d, c, d, ctx->mod); | |
} | |
} | |
/**************************** lazy *******************************************/ | |
int _n_fq_dot_lazy_size( | |
slong len, | |
const fq_nmod_ctx_t ctx) | |
{ | |
ulong t[4]; | |
slong d = fq_nmod_ctx_degree(ctx); | |
mp_limb_t p = ctx->mod.n; | |
if (d > 30 || p < 2 || len < 0) | |
return 0; | |
umul_ppmm(t[1], t[0], p - 1, p - 1); | |
t[2] = mpn_mul_1(t, t, 2, d); | |
t[3] = mpn_mul_1(t, t, 3, len); | |
if (t[3] != 0) | |
return 0; | |
if (t[2] != 0) | |
return 3; | |
if (t[1] != 0) | |
return 2; | |
return 1; | |
} | |
void _n_fq_reduce2_lazy1( | |
mp_limb_t * a, /* length 6d, 2d used */ | |
slong d, | |
nmod_t ctx) | |
{ | |
slong i; | |
for (i = 0; i < 2*d - 1; i++) | |
NMOD_RED(a[i], a[i], ctx); | |
} | |
void _n_fq_madd2_lazy1( | |
mp_limb_t * a, /* length 6d, 2d used */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i, j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t0 = 0; | |
mp_limb_t s0 = 0; | |
t0 = a[i + 0]; | |
s0 = a[(2*d - 2 - i) + 0]; | |
t0 += b[i]*c[0]; | |
s0 += b[d - 1]*c[d - 1 - i]; | |
for (j = 1; j <= i; j++) | |
{ | |
t0 += b[i - j]*c[0 + j]; | |
s0 += b[d - 1 - j]*c[d - 1 - i + j]; | |
} | |
a[i + 0] = t0; | |
a[(2*d - 2 - i) + 0] = s0; | |
} | |
{ | |
mp_limb_t t0 = 0; | |
t0 = a[(d - 1) + 0]; | |
t0 += b[d - 1]*c[0]; | |
for (j = 1; j < d; j++) | |
{ | |
t0 += b[d - 1 - j]*c[0 + j]; | |
} | |
a[(d - 1) + 0] = t0; | |
} | |
} | |
void _n_fq_mul2_lazy1( | |
mp_limb_t * a, /* length 6d, 2d used */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i,j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t0 = 0; | |
mp_limb_t s0 = 0; | |
t0 = b[i]*c[0]; | |
s0 = b[d - 1]*c[d - 1 - i]; | |
for (j = 1; j <= i; j++) | |
{ | |
t0 += b[i - j]*c[0 + j]; | |
s0 += b[d - 1 - j]*c[d - 1 - i + j]; | |
} | |
a[i + 0] = t0; | |
a[(2*d - 2 - i) + 0] = s0; | |
} | |
{ | |
mp_limb_t t0 = 0; | |
t0 = b[d - 1]*c[0]; | |
for (j = 1; j < d; j++) | |
{ | |
t0 += b[d - 1 - j]*c[0 + j]; | |
} | |
a[(d - 1) + 0] = t0; | |
} | |
} | |
void _n_fq_reduce2_lazy2( | |
mp_limb_t * a, /* length 6d, 4d used */ | |
slong d, | |
nmod_t ctx) | |
{ | |
slong i; | |
for (i = 0; i < 2*d - 1; i++) | |
NMOD2_RED2(a[i], a[2*i + 1], a[2*i + 0], ctx); | |
} | |
void _n_fq_madd2_lazy2( | |
mp_limb_t * a, /* length 6d, 4d used */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i,j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t1 = 0, t0 = 0; | |
mp_limb_t s1 = 0, s0 = 0; | |
t0 = a[2*i + 0]; | |
t1 = a[2*i + 1]; | |
s0 = a[2*(2*d - 2 - i) + 0]; | |
s1 = a[2*(2*d - 2 - i) + 1]; | |
MAC2(t1, t0, b[i], c[0]); | |
MAC2(s1, s0, b[d - 1], c[d - 1 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC2(t1, t0, b[i - j], c[0 + j]); | |
MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
a[2*i + 0] = t0; | |
a[2*i + 1] = t1; | |
a[2*(2*d - 2 - i) + 0] = s0; | |
a[2*(2*d - 2 - i) + 1] = s1; | |
} | |
{ | |
mp_limb_t t1 = 0, t0 = 0; | |
t0 = a[2*(d - 1) + 0]; | |
t1 = a[2*(d - 1) + 1]; | |
MAC2(t1, t0, b[d - 1], c[0]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC2(t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
a[2*(d - 1) + 0] = t0; | |
a[2*(d - 1) + 1] = t1; | |
} | |
} | |
void _n_fq_mul2_lazy2( | |
mp_limb_t * a, /* length 6d */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i,j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t1 = 0, t0 = 0; | |
mp_limb_t s1 = 0, s0 = 0; | |
umul_ppmm(t1, t0, b[i], c[0]); | |
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC2(t1, t0, b[i - j], c[0 + j]); | |
MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
a[2*i + 0] = t0; | |
a[2*i + 1] = t1; | |
a[2*(2*d - 2 - i) + 0] = s0; | |
a[2*(2*d - 2 - i) + 1] = s1; | |
} | |
{ | |
mp_limb_t t1 = 0, t0 = 0; | |
umul_ppmm(t1, t0, b[d - 1], c[0]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC2(t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
a[2*(d - 1) + 0] = t0; | |
a[2*(d - 1) + 1] = t1; | |
} | |
} | |
void _n_fq_reduce2_lazy3( | |
mp_limb_t * a, /* length 6d */ | |
slong d, | |
nmod_t ctx) | |
{ | |
slong i; | |
for (i = 0; i < 2*d - 1; i++) | |
NMOD_RED3(a[i], a[3*i + 2], a[3*i + 1], a[3*i + 0], ctx); | |
} | |
void _n_fq_madd2_lazy3( | |
mp_limb_t * a, /* length 6d */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i,j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
mp_limb_t s2 = 0, s1 = 0, s0 = 0; | |
t0 = a[3*i + 0]; | |
t1 = a[3*i + 1]; | |
t2 = a[3*i + 2]; | |
s0 = a[3*(2*d - 2 - i) + 0]; | |
s1 = a[3*(2*d - 2 - i) + 1]; | |
s2 = a[3*(2*d - 2 - i) + 2]; | |
MAC3(t2, t1, t0, b[i], c[0]); | |
MAC3(s2, s1, s0, b[d - 1], c[d - 1 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC3(t2, t1, t0, b[i - j], c[0 + j]); | |
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
a[3*i + 0] = t0; | |
a[3*i + 1] = t1; | |
a[3*i + 2] = t2; | |
a[3*(2*d - 2 - i) + 0] = s0; | |
a[3*(2*d - 2 - i) + 1] = s1; | |
a[3*(2*d - 2 - i) + 2] = s2; | |
} | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
t0 = a[3*(d - 1) + 0]; | |
t1 = a[3*(d - 1) + 1]; | |
t2 = a[3*(d - 1) + 2]; | |
MAC3(t2, t1, t0, b[d - 1], c[0]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
a[3*(d - 1) + 0] = t0; | |
a[3*(d - 1) + 1] = t1; | |
a[3*(d - 1) + 2] = t2; | |
} | |
} | |
void _n_fq_mul2_lazy3( | |
mp_limb_t * a, /* length 6d */ | |
const mp_limb_t * b, /* length d */ | |
const mp_limb_t * c, /* length d */ | |
slong d) | |
{ | |
slong i,j; | |
for (i = 0; i + 1 < d; i++) | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
mp_limb_t s2 = 0, s1 = 0, s0 = 0; | |
umul_ppmm(t1, t0, b[i], c[0]); | |
umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]); | |
for (j = 1; j <= i; j++) | |
{ | |
MAC3(t2, t1, t0, b[i - j], c[0 + j]); | |
MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]); | |
} | |
a[3*i + 0] = t0; | |
a[3*i + 1] = t1; | |
a[3*i + 2] = t2; | |
a[3*(2*d - 2 - i) + 0] = s0; | |
a[3*(2*d - 2 - i) + 1] = s1; | |
a[3*(2*d - 2 - i) + 2] = s2; | |
} | |
{ | |
mp_limb_t t2 = 0, t1 = 0, t0 = 0; | |
umul_ppmm(t1, t0, b[d - 1], c[0]); | |
for (j = 1; j < d; j++) | |
{ | |
MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]); | |
} | |
a[3*(d - 1) + 0] = t0; | |
a[3*(d - 1) + 1] = t1; | |
a[3*(d - 1) + 2] = t2; | |
} | |
} | |
void _n_fq_inv( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
const fq_nmod_ctx_t ctx, | |
mp_limb_t * t) /* length d */ | |
{ | |
slong d = ctx->modulus->length - 1; | |
slong blen = d; | |
FLINT_ASSERT(d > 0); | |
while (blen > 0 && b[blen - 1] == 0) | |
blen--; | |
if (blen < 1) | |
{ | |
flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv"); | |
} | |
else if (blen == 1) | |
{ | |
a[0] = n_invmod(b[0], ctx->mod.n); | |
_nmod_vec_zero(a + 1, d - 1); | |
} | |
else | |
{ | |
if (1 != _nmod_poly_gcdinv(t, a, b, blen, ctx->modulus->coeffs, d + 1, ctx->mod)) | |
{ | |
flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv"); | |
} | |
if (t[0] != 1) | |
{ | |
_nmod_vec_scalar_mul_nmod(a, a, d, n_invmod(t[0], ctx->mod.n), ctx->mod); | |
} | |
} | |
} | |
void n_fq_mul( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
const mp_limb_t * c, | |
const fq_nmod_ctx_t ctx) | |
{ | |
fq_nmod_t A, B, C; | |
fq_nmod_init(A, ctx); | |
fq_nmod_init(B, ctx); | |
fq_nmod_init(C, ctx); | |
n_fq_get_fq_nmod(B, b, ctx); | |
n_fq_get_fq_nmod(C, c, ctx); | |
fq_nmod_mul(A, B, C, ctx); | |
n_fq_set_fq_nmod(a, A, ctx); | |
fq_nmod_clear(A, ctx); | |
fq_nmod_clear(B, ctx); | |
fq_nmod_clear(C, ctx); | |
} | |
void n_fq_inv( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
const fq_nmod_ctx_t ctx) | |
{ | |
fq_nmod_t A, B; | |
fq_nmod_init(A, ctx); | |
fq_nmod_init(B, ctx); | |
n_fq_get_fq_nmod(B, b, ctx); | |
fq_nmod_inv(A, B, ctx); | |
n_fq_set_fq_nmod(a, A, ctx); | |
fq_nmod_clear(A, ctx); | |
fq_nmod_clear(B, ctx); | |
} | |
void _n_fq_pow_ui( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
ulong e, | |
const fq_nmod_ctx_t ctx) | |
{ | |
fq_nmod_t A, B; | |
fq_nmod_init(A, ctx); | |
fq_nmod_init(B, ctx); | |
n_fq_get_fq_nmod(B, b, ctx); | |
fq_nmod_pow_ui(A, B, e, ctx); | |
n_fq_set_fq_nmod(a, A, ctx); | |
fq_nmod_clear(A, ctx); | |
fq_nmod_clear(B, ctx); | |
} | |
void n_fq_pow_ui( | |
mp_limb_t * a, | |
const mp_limb_t * b, | |
ulong e, | |
const fq_nmod_ctx_t ctx) | |
{ | |
fq_nmod_t A, B; | |
fq_nmod_init(A, ctx); | |
fq_nmod_init(B, ctx); | |
n_fq_get_fq_nmod(B, b, ctx); | |
fq_nmod_pow_ui(A, B, e, ctx); | |
n_fq_set_fq_nmod(a, A, ctx); | |
fq_nmod_clear(A, ctx); | |
fq_nmod_clear(B, ctx); | |
} | |
int n_fq_is_canonical( | |
const mp_limb_t * a, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong i, d = fq_nmod_ctx_degree(ctx); | |
for (i = 0; i < d; i++) | |
{ | |
if (a[i] >= ctx->mod.n) | |
return 0; | |
} | |
return 1; | |
} | |
#include "fq_nmod_mpoly.h" | |
#include "n_poly.h" | |
#undef _WIN32_WINNT | |
#define _WIN32_WINNT 0x0600 | |
#include <conio.h> | |
#include <windows.h> | |
typedef struct { | |
mp_limb_t * coeffs; | |
ulong * exps; | |
slong length; | |
flint_bitcnt_t bits; /* number of bits per exponent */ | |
slong coeffs_alloc; /* abs size in mp_limb_t units */ | |
slong exps_alloc; /* abs size in ulong units */ | |
} new_fq_nmod_mpoly_struct; | |
typedef new_fq_nmod_mpoly_struct new_fq_nmod_mpoly_t[1]; | |
void new_fq_nmod_mpoly_init( | |
new_fq_nmod_mpoly_t A, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
A->coeffs = NULL; | |
A->exps = NULL; | |
A->length = 0; | |
A->bits = MPOLY_MIN_BITS; | |
A->coeffs_alloc = 0; | |
A->exps_alloc = 0; | |
} | |
void new_fq_nmod_mpoly_clear( | |
new_fq_nmod_mpoly_t A, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
if (A->coeffs_alloc > 0) | |
flint_free(A->coeffs); | |
if (A->exps_alloc > 0) | |
flint_free(A->exps); | |
} | |
void new_fq_nmod_mpoly_swap( | |
new_fq_nmod_mpoly_t A, | |
new_fq_nmod_mpoly_t B, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
new_fq_nmod_mpoly_struct t = *A; | |
*A = *B; | |
*B = t; | |
} | |
int n_fq_fprint_pretty( | |
FILE * file, | |
const mp_limb_t * a, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
slong i; | |
int first; | |
first = 1; | |
for (i = d - 1; i >= 0; i--) | |
{ | |
if (a[i] == 0) | |
continue; | |
if (!first) | |
flint_fprintf(file, "+"); | |
first = 0; | |
flint_fprintf(file, "%wu", a[i]); | |
if (i > 0) | |
{ | |
flint_fprintf(file, "*%s", ctx->var); | |
if (i > 1) | |
flint_fprintf(file, "^%wd", i); | |
} | |
} | |
if (first) | |
flint_fprintf(file, "0"); | |
return 1; | |
} | |
int new_fq_nmod_mpoly_fprint_pretty( | |
FILE * file, | |
const new_fq_nmod_mpoly_t A, | |
const char ** x_in, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx->fqctx); | |
slong len = A->length; | |
ulong * exp = A->exps; | |
slong bits = A->bits; | |
slong i, j, N; | |
fmpz * exponents; | |
int r = 0; | |
char ** x = (char **) x_in; | |
TMP_INIT; | |
if (len == 0) | |
{ | |
r = (EOF != fputc('0', file)); | |
return r; | |
} | |
#define CHECK_r if (r <= 0) goto done; | |
N = mpoly_words_per_exp(bits, ctx->minfo); | |
TMP_START; | |
if (x == NULL) | |
{ | |
x = (char **) TMP_ALLOC(ctx->minfo->nvars*sizeof(char *)); | |
for (i = 0; i < ctx->minfo->nvars; i++) | |
{ | |
x[i] = (char *) TMP_ALLOC(((FLINT_BITS+4)/3)*sizeof(char)); | |
flint_sprintf(x[i], "x%wd", i+1); | |
} | |
} | |
exponents = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(ulong)); | |
for (i = 0; i < ctx->minfo->nvars; i++) | |
fmpz_init(exponents + i); | |
for (i = 0; i < len; i++) | |
{ | |
if (i > 0) | |
{ | |
r = flint_fprintf(file, " + "); | |
CHECK_r | |
} | |
r = flint_fprintf(file, "("); | |
CHECK_r | |
r = n_fq_fprint_pretty(file, A->coeffs + d*i, ctx->fqctx); | |
CHECK_r | |
r = flint_fprintf(file, ")"); | |
CHECK_r | |
mpoly_get_monomial_ffmpz(exponents, exp + N*i, bits, ctx->minfo); | |
for (j = 0; j < ctx->minfo->nvars; j++) | |
{ | |
int cmp = fmpz_cmp_ui(exponents + j, 1); | |
if (cmp > 0) | |
{ | |
r = flint_fprintf(file, "*%s^", x[j]); | |
CHECK_r | |
r = fmpz_fprint(file, exponents + j); | |
CHECK_r | |
} | |
else if (cmp == 0) | |
{ | |
r = flint_fprintf(file, "*%s", x[j]); | |
CHECK_r | |
} | |
} | |
} | |
done: | |
for (i = 0; i < ctx->minfo->nvars; i++) | |
fmpz_clear(exponents + i); | |
TMP_END; | |
return r; | |
} | |
void new_fq_nmod_mpoly_print_pretty( | |
const new_fq_nmod_mpoly_t A, | |
const char ** x, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
new_fq_nmod_mpoly_fprint_pretty(stdout, A, x, ctx); | |
fflush(stdout); | |
} | |
void _new_fq_nmod_mpoly_fit_length( | |
mp_limb_t ** coeffs, | |
slong * coeffs_alloc, | |
slong d, | |
ulong ** exps, | |
slong * exps_alloc, | |
slong N, | |
slong length) | |
{ | |
if (d*length > *coeffs_alloc) | |
{ | |
*coeffs_alloc = FLINT_MAX(d*length, *coeffs_alloc*2); | |
*coeffs = flint_realloc(*coeffs, *coeffs_alloc*sizeof(mp_limb_t)); | |
} | |
if (N*length > *exps_alloc) | |
{ | |
*exps_alloc = FLINT_MAX(N*length, *exps_alloc*2); | |
*exps = flint_realloc(*exps, *exps_alloc*sizeof(ulong)); | |
} | |
} | |
void new_fq_nmod_mpoly_fit_length_set_bits( | |
new_fq_nmod_mpoly_t A, | |
slong length, | |
slong bits, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx->fqctx); | |
slong N = mpoly_words_per_exp(bits, ctx->minfo); | |
_new_fq_nmod_mpoly_fit_length(&A->coeffs, &A->coeffs_alloc, d, | |
&A->exps, &A->exps_alloc, N, length); | |
A->bits = bits; | |
} | |
void new_fq_nmod_mpoly_zero( | |
new_fq_nmod_mpoly_t A, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
A->length = 0; | |
} | |
void new_fq_nmod_mpoly_set_fq_nmod_mpoly( | |
new_fq_nmod_mpoly_t A, | |
const fq_nmod_mpoly_t B, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx->fqctx); | |
slong N = mpoly_words_per_exp(B->bits, ctx->minfo); | |
slong i; | |
new_fq_nmod_mpoly_fit_length_set_bits(A, B->length, B->bits, ctx); | |
mpoly_copy_monomials(A->exps, B->exps, B->length, N); | |
for (i = 0; i < B->length; i++) | |
n_fq_set_fq_nmod(A->coeffs + d*i, B->coeffs + i, ctx->fqctx); | |
A->length = B->length; | |
} | |
void new_fq_nmod_mpoly_get_fq_nmod_mpoly( | |
fq_nmod_mpoly_t A, | |
const new_fq_nmod_mpoly_t B, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx->fqctx); | |
slong N = mpoly_words_per_exp(B->bits, ctx->minfo); | |
slong i; | |
fq_nmod_mpoly_fit_bits(A, B->bits, ctx); | |
A->bits = B->bits; | |
fq_nmod_mpoly_fit_length(A, B->length, ctx); | |
mpoly_copy_monomials(A->exps, B->exps, B->length, N); | |
for (i = 0; i < B->length; i++) | |
n_fq_get_fq_nmod(A->coeffs + i, B->coeffs + d*i, ctx->fqctx); | |
A->length = B->length; | |
} | |
int new_fq_nmod_mpoly_equal( | |
const new_fq_nmod_mpoly_t A, | |
const new_fq_nmod_mpoly_t B, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx->fqctx); | |
if (A == B) | |
return 1; | |
if (A->length != B->length) | |
return 0; | |
if (!_nmod_vec_equal(A->coeffs, B->coeffs, d*B->length)) | |
return 0; | |
return !mpoly_monomials_cmp(A->exps, A->bits, B->exps, B->bits, | |
B->length, ctx->minfo); | |
} | |
void _new_fq_nmod_mpoly_mul_johnson1( | |
new_fq_nmod_mpoly_t A, | |
const mp_limb_t * Bcoeffs, | |
const ulong * Bexps, | |
slong Blen, | |
const mp_limb_t * Ccoeffs, | |
const ulong * Cexps, | |
slong Clen, | |
ulong maskhi, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
slong i, j; | |
slong next_loc; | |
slong heap_len = 2; /* heap zero index unused */ | |
mpoly_heap1_s * heap; | |
mpoly_heap_t * chain; | |
slong * store, * store_base; | |
mpoly_heap_t * x; | |
slong * hind; | |
ulong exp; | |
mp_limb_t * t; | |
int lazy_size = _n_fq_dot_lazy_size(Blen, ctx); | |
mp_limb_t * Acoeffs = A->coeffs; | |
ulong * Aexps = A->exps; | |
slong Acoeffs_alloc = A->coeffs_alloc; | |
slong Aexps_alloc = A->exps_alloc; | |
slong Alen; | |
TMP_INIT; | |
TMP_START; | |
next_loc = Blen + 4; /* something bigger than heap can ever be */ | |
heap = (mpoly_heap1_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap1_s)); | |
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t)); | |
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong)); | |
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong)); | |
t = (mp_limb_t *) TMP_ALLOC(6*d*sizeof(mp_limb_t)); | |
for (i = 0; i < Blen; i++) | |
hind[i] = 1; | |
/* put (0, 0, exp2[0] + exp3[0]) on heap */ | |
x = chain + 0; | |
x->i = 0; | |
x->j = 0; | |
x->next = NULL; | |
HEAP_ASSIGN(heap[1], Bexps[0] + Cexps[0], x); | |
hind[0] = 2*1 + 0; | |
Alen = 0; | |
while (heap_len > 1) | |
{ | |
exp = heap[1].exp; | |
_new_fq_nmod_mpoly_fit_length(&Acoeffs, &Acoeffs_alloc, d, | |
&Aexps, &Aexps_alloc, 1, Alen + 1); | |
Aexps[Alen] = exp; | |
_nmod_vec_zero(t, 6*d); | |
switch (lazy_size) | |
{ | |
case 1: | |
do { | |
x = _mpoly_heap_pop1(heap, &heap_len, maskhi); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy1(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && heap[1].exp == exp); | |
_n_fq_reduce2_lazy1(t, d, ctx->mod); | |
break; | |
case 2: | |
do { | |
x = _mpoly_heap_pop1(heap, &heap_len, maskhi); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy2(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && heap[1].exp == exp); | |
_n_fq_reduce2_lazy2(t, d, ctx->mod); | |
break; | |
case 3: | |
do { | |
x = _mpoly_heap_pop1(heap, &heap_len, maskhi); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy3(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && heap[1].exp == exp); | |
_n_fq_reduce2_lazy3(t, d, ctx->mod); | |
break; | |
default: | |
do { | |
x = _mpoly_heap_pop1(heap, &heap_len, maskhi); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2(t, Bcoeffs + d*x->i, | |
Ccoeffs + d*x->j, ctx, t + 2*d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && heap[1].exp == exp); | |
break; | |
} | |
_n_fq_reduce2(Acoeffs + d*Alen, t, ctx, t + 2*d); | |
Alen += !_n_fq_is_zero(Acoeffs + d*Alen, d); | |
while (store > store_base) | |
{ | |
j = *--store; | |
i = *--store; | |
/* should we go right? */ | |
if ((i + 1 < Blen) && | |
(hind[i + 1] == 2*j + 1) | |
) | |
{ | |
x = chain + i + 1; | |
x->i = i + 1; | |
x->j = j; | |
x->next = NULL; | |
hind[x->i] = 2*(x->j + 1) + 0; | |
_mpoly_heap_insert1(heap, Bexps[x->i] + Cexps[x->j], x, | |
&next_loc, &heap_len, maskhi); | |
} | |
/* should we go up? */ | |
if ((j + 1 < Clen) && | |
((hind[i] & 1) == 1) && | |
((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1)) | |
) | |
{ | |
x = chain + i; | |
x->i = i; | |
x->j = j + 1; | |
x->next = NULL; | |
hind[x->i] = 2*(x->j + 1) + 0; | |
_mpoly_heap_insert1(heap, Bexps[x->i] + Cexps[x->j], x, | |
&next_loc, &heap_len, maskhi); | |
} | |
} | |
} | |
A->coeffs = Acoeffs; | |
A->exps = Aexps; | |
A->coeffs_alloc = Acoeffs_alloc; | |
A->exps_alloc = Aexps_alloc; | |
A->length = Alen; | |
TMP_END; | |
} | |
void _new_fq_nmod_mpoly_mul_johnson( | |
new_fq_nmod_mpoly_t A, | |
const mp_limb_t * Bcoeffs, | |
const ulong * Bexps, | |
slong Blen, | |
const mp_limb_t * Ccoeffs, | |
const ulong * Cexps, | |
slong Clen, | |
flint_bitcnt_t bits, | |
slong N, | |
const ulong * cmpmask, | |
const fq_nmod_ctx_t ctx) | |
{ | |
slong d = fq_nmod_ctx_degree(ctx); | |
slong i, j; | |
slong next_loc; | |
slong heap_len = 2; /* heap zero index unused */ | |
mpoly_heap_s * heap; | |
mpoly_heap_t * chain; | |
slong * store, * store_base; | |
mpoly_heap_t * x; | |
ulong * exp, * exps; | |
ulong ** exp_list; | |
slong exp_next; | |
slong * hind; | |
mp_limb_t * t; | |
int lazy_size = _n_fq_dot_lazy_size(Blen, ctx); | |
mp_limb_t * Acoeffs = A->coeffs; | |
ulong * Aexps = A->exps; | |
slong Acoeffs_alloc = A->coeffs_alloc; | |
slong Aexps_alloc = A->exps_alloc; | |
slong Alen; | |
TMP_INIT; | |
FLINT_ASSERT(Blen > 0); | |
FLINT_ASSERT(Clen > 0); | |
FLINT_ASSERT(A->bits == bits); | |
if (N == 1) | |
{ | |
_new_fq_nmod_mpoly_mul_johnson1(A, Bcoeffs, Bexps, Blen, | |
Ccoeffs, Cexps, Clen, cmpmask[0], ctx); | |
return; | |
} | |
TMP_START; | |
next_loc = Blen + 4; /* something bigger than heap can ever be */ | |
heap = (mpoly_heap_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap_s)); | |
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t)); | |
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong)); | |
exps = (ulong *) TMP_ALLOC(Blen*N*sizeof(ulong)); | |
exp_list = (ulong **) TMP_ALLOC(Blen*sizeof(ulong *)); | |
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong)); | |
t = (mp_limb_t *) TMP_ALLOC(6*d*sizeof(mp_limb_t)); | |
for (i = 0; i < Blen; i++) | |
{ | |
exp_list[i] = exps + i*N; | |
hind[i] = 1; | |
} | |
/* start with no heap nodes and no exponent vectors in use */ | |
exp_next = 0; | |
/* put (0, 0, exp2[0] + exp3[0]) on heap */ | |
x = chain + 0; | |
x->i = 0; | |
x->j = 0; | |
x->next = NULL; | |
heap[1].next = x; | |
heap[1].exp = exp_list[exp_next++]; | |
if (bits <= FLINT_BITS) | |
mpoly_monomial_add(heap[1].exp, Bexps + N*0, Cexps + N*0, N); | |
else | |
mpoly_monomial_add_mp(heap[1].exp, Bexps + N*0, Cexps + N*0, N); | |
hind[0] = 2*1 + 0; | |
Alen = 0; | |
while (heap_len > 1) | |
{ | |
exp = heap[1].exp; | |
_new_fq_nmod_mpoly_fit_length(&Acoeffs, &Acoeffs_alloc, d, | |
&Aexps, &Aexps_alloc, N, Alen + 1); | |
mpoly_monomial_set(Aexps + N*Alen, exp, N); | |
_nmod_vec_zero(t, 6*d); | |
switch (lazy_size) | |
{ | |
case 1: | |
do { | |
exp_list[--exp_next] = heap[1].exp; | |
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy1(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)); | |
_n_fq_reduce2_lazy1(t, d, ctx->mod); | |
break; | |
case 2: | |
do { | |
exp_list[--exp_next] = heap[1].exp; | |
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy2(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)); | |
_n_fq_reduce2_lazy2(t, d, ctx->mod); | |
break; | |
case 3: | |
do { | |
exp_list[--exp_next] = heap[1].exp; | |
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2_lazy3(t, Bcoeffs + d*x->i, Ccoeffs + d*x->j, d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)); | |
_n_fq_reduce2_lazy3(t, d, ctx->mod); | |
break; | |
default: | |
do { | |
exp_list[--exp_next] = heap[1].exp; | |
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); | |
do { | |
hind[x->i] |= WORD(1); | |
*store++ = x->i; | |
*store++ = x->j; | |
_n_fq_madd2(t, Bcoeffs + d*x->i, | |
Ccoeffs + d*x->j, ctx, t + 2*d); | |
} while ((x = x->next) != NULL); | |
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)); | |
break; | |
} | |
_n_fq_reduce2(Acoeffs + d*Alen, t, ctx, t + 2*d); | |
Alen += !_n_fq_is_zero(Acoeffs + d*Alen, d); | |
while (store > store_base) | |
{ | |
j = *--store; | |
i = *--store; | |
/* should we go right? */ | |
if ((i + 1 < Blen) && | |
(hind[i + 1] == 2*j + 1) | |
) | |
{ | |
x = chain + i + 1; | |
x->i = i + 1; | |
x->j = j; | |
x->next = NULL; | |
hind[x->i] = 2*(x->j + 1) + 0; | |
if (bits <= FLINT_BITS) | |
mpoly_monomial_add(exp_list[exp_next], Bexps + x->i*N, | |
Cexps + x->j*N, N); | |
else | |
mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N, | |
Cexps + x->j*N, N); | |
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, | |
&next_loc, &heap_len, N, cmpmask); | |
} | |
/* should we go up? */ | |
if ((j + 1 < Clen) && | |
((hind[i] & 1) == 1) && | |
((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1)) | |
) | |
{ | |
x = chain + i; | |
x->i = i; | |
x->j = j + 1; | |
x->next = NULL; | |
hind[x->i] = 2*(x->j + 1) + 0; | |
if (bits <= FLINT_BITS) | |
mpoly_monomial_add(exp_list[exp_next], Bexps + x->i*N, | |
Cexps + x->j*N, N); | |
else | |
mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N, | |
Cexps + x->j*N, N); | |
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, | |
&next_loc, &heap_len, N, cmpmask); | |
} | |
} | |
} | |
A->coeffs = Acoeffs; | |
A->exps = Aexps; | |
A->coeffs_alloc = Acoeffs_alloc; | |
A->exps_alloc = Aexps_alloc; | |
A->length = Alen; | |
TMP_END; | |
} | |
void new_fq_nmod_mpoly_mul_johnson( | |
new_fq_nmod_mpoly_t A, | |
const new_fq_nmod_mpoly_t B, | |
const new_fq_nmod_mpoly_t C, | |
const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
slong i, N, Alen; | |
flint_bitcnt_t Abits; | |
fmpz * Bmaxfields, * Cmaxfields; | |
ulong * cmpmask; | |
ulong * Bexps = B->exps, * Cexps = C->exps; | |
int freeBexps = 0, freeCexps = 0; | |
new_fq_nmod_mpoly_struct * P, T[1]; | |
TMP_INIT; | |
if (B->length < 1 || C->length < 1) | |
{ | |
new_fq_nmod_mpoly_zero(A, ctx); | |
return; | |
} | |
TMP_START; | |
Bmaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz)); | |
Cmaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz)); | |
for (i = 0; i < ctx->minfo->nfields; i++) | |
{ | |
fmpz_init(Bmaxfields + i); | |
fmpz_init(Cmaxfields + i); | |
} | |
mpoly_max_fields_fmpz(Bmaxfields, Bexps, B->length, B->bits, ctx->minfo); | |
mpoly_max_fields_fmpz(Cmaxfields, Cexps, C->length, C->bits, ctx->minfo); | |
_fmpz_vec_add(Bmaxfields, Bmaxfields, Cmaxfields, ctx->minfo->nfields); | |
Abits = _fmpz_vec_max_bits(Bmaxfields, ctx->minfo->nfields); | |
Abits = FLINT_MAX(MPOLY_MIN_BITS, Abits + 1); | |
Abits = FLINT_MAX(Abits, B->bits); | |
Abits = FLINT_MAX(Abits, C->bits); | |
Abits = mpoly_fix_bits(Abits, ctx->minfo); | |
for (i = 0; i < ctx->minfo->nfields; i++) | |
{ | |
fmpz_clear(Bmaxfields + i); | |
fmpz_clear(Cmaxfields + i); | |
} | |
N = mpoly_words_per_exp(Abits, ctx->minfo); | |
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong)); | |
mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo); | |
/* ensure input exponents are packed into same sized fields as output */ | |
if (Abits != B->bits) | |
{ | |
freeBexps = 1; | |
Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong)); | |
mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, B->length, ctx->minfo); | |
} | |
if (Abits != C->bits) | |
{ | |
freeCexps = 1; | |
Cexps = (ulong *) flint_malloc(N*C->length*sizeof(ulong)); | |
mpoly_repack_monomials(Cexps, Abits, C->exps, C->bits, C->length, ctx->minfo); | |
} | |
if (A == B || A == C) | |
{ | |
new_fq_nmod_mpoly_init(T, ctx); | |
P = T; | |
} | |
else | |
{ | |
P = A; | |
} | |
new_fq_nmod_mpoly_fit_length_set_bits(P, B->length + C->length, Abits, ctx); | |
if (B->length > C->length) | |
{ | |
_new_fq_nmod_mpoly_mul_johnson(P, C->coeffs, Cexps, C->length, | |
B->coeffs, Bexps, B->length, Abits, N, cmpmask, ctx->fqctx); | |
} | |
else | |
{ | |
_new_fq_nmod_mpoly_mul_johnson(P, B->coeffs, Bexps, B->length, | |
C->coeffs, Cexps, C->length, Abits, N, cmpmask, ctx->fqctx); | |
} | |
if (A == B || A == C) | |
{ | |
new_fq_nmod_mpoly_swap(A, T, ctx); | |
new_fq_nmod_mpoly_clear(T, ctx); | |
} | |
if (freeBexps) | |
flint_free(Bexps); | |
if (freeCexps) | |
flint_free(Cexps); | |
TMP_END; | |
} | |
void fq_nmod_mpoly_mul(fq_nmod_mpoly_t A, const fq_nmod_mpoly_t B, | |
const fq_nmod_mpoly_t C, const fq_nmod_mpoly_ctx_t ctx) | |
{ | |
new_fq_nmod_mpoly_t a, b, c, d; | |
ulong t1, t2; | |
new_fq_nmod_mpoly_init(a, ctx); | |
new_fq_nmod_mpoly_init(b, ctx); | |
new_fq_nmod_mpoly_init(c, ctx); | |
new_fq_nmod_mpoly_init(d, ctx); | |
new_fq_nmod_mpoly_set_fq_nmod_mpoly(a, A, ctx); | |
new_fq_nmod_mpoly_set_fq_nmod_mpoly(b, B, ctx); | |
new_fq_nmod_mpoly_set_fq_nmod_mpoly(c, C, ctx); | |
t1 = GetTickCount64(); | |
new_fq_nmod_mpoly_mul_johnson(a, b, c, ctx); | |
t1 = GetTickCount64() - t1; | |
t2 = GetTickCount64(); | |
fq_nmod_mpoly_mul_johnson(A, B, C, ctx); | |
t2 = GetTickCount64() - t2; | |
flint_printf("new: %wu old: %wu ratio: %f\n", t1, t2, (double) t2 / (double) t1); | |
new_fq_nmod_mpoly_set_fq_nmod_mpoly(d, A, ctx); | |
if (!new_fq_nmod_mpoly_equal(a, d, ctx)) | |
{ | |
flint_printf("oops\n"); | |
flint_abort(); | |
} | |
new_fq_nmod_mpoly_clear(a, ctx); | |
new_fq_nmod_mpoly_clear(b, ctx); | |
new_fq_nmod_mpoly_clear(c, ctx); | |
new_fq_nmod_mpoly_clear(d, ctx); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment