Skip to content

Instantly share code, notes, and snippets.

@fredrik-johansson
Created March 19, 2024 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fredrik-johansson/0da150e1ddab53649ec2085a468dd753 to your computer and use it in GitHub Desktop.
Save fredrik-johansson/0da150e1ddab53649ec2085a468dd753 to your computer and use it in GitHub Desktop.
#include "gr.h"
#include "arf.h"
#include "mpn_extras.h"
#include "crt_helpers.h"
typedef struct
{
slong sign;
slong exp;
mp_limb_t d[4];
}
mpf4_struct;
typedef mpf4_struct mpf4_t[1];
typedef mpf4_struct * mpf4_ptr;
typedef const mpf4_struct * mpf4_srcptr;
int
_mpf4_set(mpf4_t res, const mpf4_t x, gr_ctx_t ctx)
{
*res = *x;
return GR_SUCCESS;
}
int
_mpf4_zero(mpf4_t res, gr_ctx_t ctx)
{
res->sign = 0;
res->exp = 0;
res->d[0] = 0;
res->d[1] = 0;
res->d[2] = 0;
res->d[3] = 0;
return GR_SUCCESS;
}
int
_mpf4_one(mpf4_t res, gr_ctx_t ctx)
{
res->sign = 1;
res->exp = 1;
res->d[0] = 0;
res->d[1] = 0;
res->d[2] = 0;
res->d[3] = UWORD(1) << (FLINT_BITS - 1);
return GR_SUCCESS;
}
int
_mpf4_neg_one(mpf4_t res, gr_ctx_t ctx)
{
res->sign = -1;
res->exp = 1;
res->d[0] = 0;
res->d[1] = 0;
res->d[2] = 0;
res->d[3] = UWORD(1) << (FLINT_BITS - 1);
return GR_SUCCESS;
}
/* todo: overflow/underflow */
int
_mpf4_set_arf(mpf4_t res, const arf_t x, gr_ctx_t ctx)
{
if (arf_is_special(x))
{
if (arf_is_zero(x))
{
return _mpf4_zero(res, ctx);
}
else
{
return GR_DOMAIN;
}
}
else
{
mp_srcptr xp;
mp_size_t xn;
ARF_GET_MPN_READONLY(xp, xn, x);
res->d[3] = xp[xn - 1];
res->d[2] = (xn >= 2) ? xp[xn - 2] : 0;
res->d[1] = (xn >= 3) ? xp[xn - 3] : 0;
res->d[0] = (xn >= 4) ? xp[xn - 4] : 0;
res->sign = ARF_SGNBIT(x) ? -1 : 1;
res->exp = ARF_EXP(x);
return GR_SUCCESS;
}
}
int
_mpf4_get_arf(arf_t res, const mpf4_t x, gr_ctx_t ctx)
{
slong shift;
if (x->sign == 0)
{
arf_zero(res);
}
else
{
_arf_set_round_mpn(res, &shift, x->d, 4, x->sign < 0, 256, ARF_RND_DOWN);
fmpz_set_si(ARF_EXPREF(res), x->exp + shift);
}
return GR_SUCCESS;
}
__attribute__((noinline)) int
_mpf4_add(mpf4_ptr res, mpf4_srcptr x, mpf4_srcptr y, gr_ctx_t ctx)
{
slong xexp, yexp, delta;
slong xsign, ysign;
if (x->sign == 0)
return _mpf4_set(res, y, ctx);
if (y->sign == 0)
return _mpf4_set(res, x, ctx);
xexp = x->exp;
yexp = y->exp;
if (xexp < yexp)
{
FLINT_SWAP(mpf4_srcptr, x, y);
FLINT_SWAP(slong, xexp, yexp);
}
xsign = x->sign;
ysign = y->sign;
mp_limb_t t0, t1, t2, t3, t4, u0, u1, u2, u3, u4;
delta = xexp - yexp;
if (xsign == ysign)
{
t0 = x->d[0];
t1 = x->d[1];
t2 = x->d[2];
t3 = x->d[3];
u0 = y->d[0];
u1 = y->d[1];
u2 = y->d[2];
u3 = y->d[3];
if (delta < FLINT_BITS)
{
if (delta != 0)
{
u0 = (u0 >> delta) | (u1 << (FLINT_BITS - delta));
u1 = (u1 >> delta) | (u2 << (FLINT_BITS - delta));
u2 = (u2 >> delta) | (u3 << (FLINT_BITS - delta));
u3 = (u3 >> delta);
}
}
else
{
if (delta < 2 * FLINT_BITS)
{
if (delta == FLINT_BITS)
{
u0 = u1;
u1 = u2;
u2 = u3;
}
else
{
u0 = (u1 >> delta) | (u2 << (FLINT_BITS - delta));
u1 = (u2 >> delta) | (u3 << (FLINT_BITS - delta));
u2 = (u3 >> delta);
}
u3 = 0;
}
else if (delta < 3 * FLINT_BITS)
{
if (delta == 2 * FLINT_BITS)
{
u0 = u2;
u1 = u3;
}
else
{
u0 = (u2 >> delta) | (u3 << (FLINT_BITS - delta));
u1 = (u3 >> delta);
}
u2 = 0;
u3 = 0;
}
else if (delta < 4 * FLINT_BITS)
{
if (delta == 3 * FLINT_BITS)
u0 = u3;
else
u0 = (u3 >> delta);
u1 = 0;
u2 = 0;
u3 = 0;
}
else
{
goto write_noshift;
}
}
add_sssssaaaaaaaaaa(t4, t3, t2, t1, t0, 0, t3, t2, t1, t0, 0, u3, u2, u1, u0);
if (t4 == 0)
{
write_noshift:
res->sign = xsign;
res->exp = xexp;
res->d[0] = t0;
res->d[1] = t1;
res->d[2] = t2;
res->d[3] = t3;
}
else
{
res->sign = xsign;
res->exp = xexp + 1;
res->d[0] = (t0 >> 1) | (t1 << (FLINT_BITS - 1));
res->d[1] = (t1 >> 1) | (t2 << (FLINT_BITS - 1));
res->d[2] = (t2 >> 1) | (t3 << (FLINT_BITS - 1));
res->d[3] = (t3 >> 1) | (UWORD(1) << (FLINT_BITS - 1));
}
}
else
{
t0 = 0;
t1 = x->d[0];
t2 = x->d[1];
t3 = x->d[2];
t4 = x->d[3];
u0 = 0;
u1 = y->d[0];
u2 = y->d[1];
u3 = y->d[2];
u4 = y->d[3];
if (delta == 0)
{
int cmp;
if (t3 > u3)
cmp = 1;
else if (t3 < u3)
cmp = -1;
else
/* todo: ensure return is 1 or -1 */
cmp = mpn_cmp(x->d, y->d, 3);
if (cmp == 0)
return _mpf4_zero(res, ctx);
if (cmp > 1)
sub_ddddmmmmssss(t3, t2, t1, t0, t3, t2, t1, t0, u3, u2, u1, u0);
else
sub_ddddmmmmssss(t3, t2, t1, t0, u3, u2, u1, u0, t3, t2, t1, t0);
if (t3 < (UWORD(1) << (FLINT_BITS - 2)))
{
/* todo */
flint_abort();
}
res->sign = xsign;
res->exp = xexp + 1;
res->d[0] = (t0 >> 1) | (t1 << (FLINT_BITS - 1));
res->d[1] = (t1 >> 1) | (t2 << (FLINT_BITS - 1));
res->d[2] = (t2 >> 1) | (t3 << (FLINT_BITS - 1));
res->d[3] = (t3 >> 1) | (UWORD(1) << (FLINT_BITS - 1));
}
else if (delta < FLINT_BITS)
{
u0 = (u1 << (FLINT_BITS - delta));
u1 = (u1 >> delta) | (u2 << (FLINT_BITS - delta));
u2 = (u2 >> delta) | (u3 << (FLINT_BITS - delta));
u3 = (u3 >> delta) | (u4 << (FLINT_BITS - delta));
u4 = (u4 >> delta);
}
else
{
flint_abort();
}
sub_dddddmmmmmsssss(t4, t3, t2, t1, t0, t4, t3, t2, t1, t0, u4, u3, u2, u1, u0);
if (t4 == 0)
{
/* todo */
flint_abort();
}
if (t4 >> (FLINT_BITS - 1))
{
res->sign = xsign;
res->exp = xexp;
res->d[0] = t1;
res->d[1] = t2;
res->d[2] = t3;
res->d[3] = t4;
}
else
{
slong shift = flint_clz(t4);
res->sign = xsign;
res->exp = xexp - shift;
res->d[0] = (t1 << shift) | (t0 >> (FLINT_BITS - shift));
res->d[1] = (t2 << shift) | (t1 >> (FLINT_BITS - shift));
res->d[2] = (t3 << shift) | (t2 >> (FLINT_BITS - shift));
res->d[3] = (t4 << shift) | (t3 >> (FLINT_BITS - shift));
}
}
return GR_SUCCESS;
}
mp_limb_pair_t flint_mpn_mulhigh_normalised_4(mp_ptr, mp_srcptr, mp_srcptr);
__attribute__((noinline)) int
_mpf4_mul(mpf4_ptr res, mpf4_srcptr x, mpf4_srcptr y, gr_ctx_t ctx)
{
slong xexp, yexp;
slong xsign, ysign;
mp_limb_pair_t ret;
xsign = x->sign;
ysign = y->sign;
if (xsign == 0 || ysign == 0)
return _mpf4_zero(res, ctx);
xexp = x->exp;
yexp = y->exp;
ret = flint_mpn_mulhigh_normalised(res->d, x->d, y->d, 4);
res->exp = xexp + yexp - (slong) ret.m2;
res->sign = x->sign * y->sign;
return GR_SUCCESS;
}
#include "profiler.h"
int main()
{
arf_t a, b, c, d;
mpf4_t x, y, z;
arf_init(a);
arf_init(b);
arf_init(c);
arf_init(d);
arf_sqrt_ui(a, 2, 256, ARF_RND_DOWN);
arf_sqrt_ui(b, 3, 256, ARF_RND_DOWN);
_mpf4_set_arf(x, a, NULL);
_mpf4_set_arf(y, b, NULL);
slong i;
TIMEIT_START
arf_set(c, a);
for (i = 0; i < 1000; i++)
arf_add(c, c, b, 256, ARF_RND_DOWN);
TIMEIT_STOP
TIMEIT_START
_mpf4_set(z, x, NULL);
for (i = 0; i < 1000; i++)
_mpf4_add(z, z, y, NULL);
TIMEIT_STOP
_mpf4_get_arf(d, z, NULL);
arf_printd(c, 76); flint_printf("\n");
arf_printd(d, 76); flint_printf("\n");
_mpf4_set_arf(x, a, NULL);
_mpf4_set_arf(y, b, NULL);
TIMEIT_START
arf_set(c, a);
for (i = 0; i < 1000; i++)
arf_mul(c, c, b, 256, ARF_RND_DOWN);
TIMEIT_STOP
TIMEIT_START
_mpf4_set(z, x, NULL);
for (i = 0; i < 1000; i++)
_mpf4_mul(z, z, y, NULL);
TIMEIT_STOP
_mpf4_get_arf(d, z, NULL);
arf_printd(c, 76); flint_printf("\n");
arf_printd(d, 76); flint_printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment