Skip to content

Instantly share code, notes, and snippets.

@pv
Last active August 29, 2015 14:26
Show Gist options
  • Save pv/fe407dee3170a7e2243a to your computer and use it in GitHub Desktop.
Save pv/fe407dee3170a7e2243a to your computer and use it in GitHub Desktop.
*~
*.so
*.pyc
*.pyxbldc
/_slds.c
/build
cimport libc.stdlib
cimport cython
cimport numpy as np
cdef extern from "slds.h":
ctypedef long slds_int_t
ctypedef struct slds_entry_t:
slds_int_t a
slds_int_t lb
slds_int_t ub
unsigned int j
ctypedef enum slds_status_t:
FEASIBLE
INFEASIBLE
TOO_HARD
INTEGER_OVERFLOW
ERROR
slds_status_t _slds "slds" (unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x)
void slds_sort(unsigned int n, slds_entry_t *E)
class TooHardException(Exception):
pass
@cython.embedsignature(True)
def slds(A, L, U, b, max_work=None):
"""
Solve bounded Diophantine equation
The problem considered is::
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
L[i] <= x[i] <= U[i]
A[i] > 0
Solve via depth-first Euclid's algorithm, as explained in [1]
Parameters
----------
A, L, U : list of int
Problem description, lists of integers of equal length
b : int
Right-hand side
max_work : int, optional
Maximum number of cases to consider.
Default: no upper limit.
Returns
-------
x : list of int or None
List of integers describing one solution, or None if the problem
is infeasible.
Raises
------
TooHardException
If max_work is given, raised when solution requires too much work.
OverflowError
If integer overflow prevented completing the calculation.
ValueError
If invalid input parameters
References
----------
.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
a System of Linear Diophantine Equations with Bounded Variables''.
Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
182-192 (2006). doi:10.1007/11792086_14
"""
cdef ssize_t j, n
cdef slds_entry_t *E
cdef slds_int_t d
cdef slds_int_t *x
cdef slds_status_t res
if max_work is None:
max_work = 0
n = len(A)
if n < 0 or len(L) != n or len(U) != n:
raise ValueError("Invalid lengths of A, L, U")
E = <slds_entry_t*>libc.stdlib.calloc(n, sizeof(slds_entry_t))
x = <slds_int_t*>libc.stdlib.calloc(n, sizeof(slds_int_t))
try:
if E == NULL or x == NULL:
raise MemoryError()
for j in range(n):
E[j].a = A[j]
E[j].lb = L[j]
E[j].ub = U[j]
E[j].j = j
slds_sort(n, E)
res = _slds(n, E, b, max_work, x)
if res == FEASIBLE:
x_list = [0]*n
for j in range(n):
x_list[E[j].j] = x[j]
return x_list
elif res == INFEASIBLE:
return None
elif res == TOO_HARD:
raise TooHardException()
elif res == INTEGER_OVERFLOW:
raise OverflowError()
else:
raise ValueError("Invalid inputs")
finally:
libc.stdlib.free(E)
libc.stdlib.free(x)
def may_share_memory(np.ndarray a_arr, np.ndarray b_arr, max_work=None):
base_a = int(<Py_ssize_t>a_arr.data)
base_b = int(<Py_ssize_t>b_arr.data)
itemsize_a = int(a_arr.dtype.itemsize)
itemsize_b = int(b_arr.dtype.itemsize)
shape_a = [a_arr.shape[j] for j in range(a_arr.ndim)] + [itemsize_a]
shape_b = [b_arr.shape[j] for j in range(b_arr.ndim)] + [itemsize_b]
stride_a = [a_arr.strides[j] for j in range(a_arr.ndim)] + [1]
stride_b = [b_arr.strides[j] for j in range(b_arr.ndim)] + [1]
# Ensure non-empty
if min(shape_a) == 0 or min(shape_b) == 0:
return False
# Drop zero strides and unit dimensions
shape_a2 = []
stride_a2 = []
for n, s in zip(shape_a, stride_a):
if n != 1 and s != 0:
shape_a2.append(n)
stride_a2.append(s)
shape_a = shape_a2
stride_a = stride_a2
shape_b2 = []
stride_b2 = []
for n, s in zip(shape_b, stride_b):
if n != 1 and s != 0:
shape_b2.append(n)
stride_b2.append(s)
shape_b = shape_b2
stride_b = stride_b2
# Both are scalars
if len(shape_a) == 0 and len(shape_b) == 0:
return base_a == base_b
# Make a strides positive
for j, s in enumerate(stride_a):
if s < 0:
stride_a[j] = -s
base_a += (shape_a[j] - 1) * s
# Make b strides negative
for j, s in enumerate(stride_b):
if s > 0:
stride_b[j] = -s
base_b += (shape_b[j] - 1) * s
# Diophantine form
A = stride_a + [-s for s in stride_b]
N = shape_a + shape_b
d = base_b - base_a
# Sort and combine equal coefficients
A2 = []
N2 = []
last_a = 0
for a, n in sorted(zip(A, N)):
if a == last_a:
N2[-1] += n - 1
else:
last_a = a
A2.append(a)
N2.append(n)
A = A2
N = N2
# Solve
L = [0]*len(A)
U = [n-1 for n in N]
try:
return slds(A, L, U, d, max_work) is not None
except TooHardException:
return True
# -*-python-*-
import os
def make_ext(modname, pyxfilename):
from distutils.extension import Extension
pth = os.path.abspath(os.path.dirname(pyxfilename))
return Extension(name='_slds',
sources=[pyxfilename, os.path.join(pth, 'slds.c')],
include_dirs=[pth])
from __future__ import division, absolute_import, print_function
import itertools
import random
import timeit
import time
from fractions import gcd
from six.moves import xrange
# XXX: just a quick try, didn't yet check if it's fully correct
def maybe_feasible(A, N, d, f):
"""
Decide whether a bounded Diophantine equation might have a solution.
The problem considered is::
Given integers {A}, {N}, d, f, with 0 < A[0] < A[1] < ... < A[n-1]
and N[i] > 0, are there integers x[0], ..., x[n-1] such that
d <= A[0]*x[0] + ... + A[n-1]*x[n-1] <= f
0 <= x[i] < N[i]
Parameters
----------
A : list of int
N : list of int
d : int
Returns
-------
maybe_feasible : bool
If True, the problem might or might not have solution.
If False, the problem does not have solution.
"""
assert A[0] > 0
assert all(a < b for a, b in zip(A[:-1], A[1:]))
assert all(n > 0 for n in N)
# Simple bounds checks
if f < 0:
return False
if sum(a*(n-1) for a, n in zip(A, N)) < d:
return False
if d <= 0:
return True
# Check gcd
A_gcd = A[0]
for a in A[1:]:
A_gcd = gcd(a, A_gcd)
if not any_divides(d, f, A_gcd):
return False
# Consider feasibility of remaining range
if len(A) > 1:
return maybe_feasible(A[1:], N[1:], d - A[0]*(N[0]-1), f)
else:
return True
def any_divides(a, b, x):
"""
any([y % x == 0 for y in range(a, b+1)])
"""
assert x > 0 and b >= a and a >= 0
r = a % x
if r == 0:
return True
return (b - a + r) >= x
def dfs_feasible(A, N, d, count=None):
"""
Same as maybe_feasible but solve exactly (with f = d)
and return found solution.
"""
assert all(n > 0 for n in N)
assert all(a > 0 for a in A)
if count is not None:
count[0] += 1
if d < 0:
return None
if len(A) == 1:
k, r = divmod(d, A[0])
if r != 0 or k >= N[0]:
return None
return (k,)
else:
# Check gcd
A_gcd = A[0]
for a in A[1:]:
A_gcd = gcd(a, A_gcd)
if d % A_gcd != 0:
return None
# Iterate over candidates
j_max = min(d // A[-1], N[-1] - 1)
j_min = max(0, (d - sum(a*(n-1) for a, n in zip(A[:-1], N[:-1]))) // A[-1])
for j in xrange(j_max, j_min-1, -1):
v = dfs_feasible(A[:-1], N[:-1], d - j*A[-1], count)
if v:
return v + (j,)
return None
def may_share_memory(base_a, shape_a, stride_a,
base_b, shape_b, stride_b):
shape_a = list(shape_a)
shape_b = list(shape_b)
stride_a = list(stride_a)
stride_b = list(stride_b)
# Ensure non-empty
if min(shape_a) == 0 or min(shape_b) == 0:
return False
# Drop zero strides and unit dimensions
shape_a2 = []
stride_a2 = []
for n, s in zip(shape_a, stride_a):
if n != 1 and s != 0:
shape_a2.append(n)
stride_a2.append(s)
shape_a = shape_a2
stride_a = stride_a2
shape_b2 = []
stride_b2 = []
for n, s in zip(shape_b, stride_b):
if n != 1 and s != 0:
shape_b2.append(n)
stride_b2.append(s)
shape_b = shape_b2
stride_b = stride_b2
# Both are scalars
if len(shape_a) == 0 and len(shape_b) == 0:
return base_a == base_b
# Make a strides positive
for j, s in enumerate(stride_a):
if s < 0:
stride_a[j] = -s
base_a += (shape_a[j] - 1) * s
# Make b strides negative
for j, s in enumerate(stride_b):
if s > 0:
stride_b[j] = -s
base_b += (shape_b[j] - 1) * s
# Diophantine form
A = stride_a + [-s for s in stride_b]
N = shape_a + shape_b
d = base_b - base_a
# Sort and combine equal coefficients
A2 = []
N2 = []
last_a = 0
for a, n in sorted(zip(A, N)):
if a == last_a:
N2[-1] += n - 1
else:
last_a = a
A2.append(a)
N2.append(n)
A = A2
N = N2
# Heuristics
maybe = maybe_feasible(A, N, d, d)
# Exact (brute-force, for checking only)
exact = any(sum(v) == d for v in itertools.product(*(range(0, a*n, a) for a, n in zip(A, N))))
if not maybe:
assert not exact
dfs = dfs_feasible(A, N, d)
assert bool(dfs) == exact, (dfs, exact)
assert dfs is None or sum(a*x for a, x in zip(A, dfs)) == d
print(maybe, exact, dfs)
return maybe
def test():
import numpy as np
x = np.zeros((5, 5, 4), dtype=np.int8)
a = x[:,:3,:]
b = x[:,3:,:]
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
b.__array_interface__['data'][0], b.shape, b.strides)
b = b.reshape(5, 4, 2).T
assert b.base is x
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
b.__array_interface__['data'][0], b.shape, b.strides)
b = b[1:,1:,1:]
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
b.__array_interface__['data'][0], b.shape, b.strides)
b = x[:,2:,:]
assert may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
b.__array_interface__['data'][0], b.shape, b.strides)
b = b.copy()
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
b.__array_interface__['data'][0], b.shape, b.strides)
def test_dfs():
# Try to get some idea of worst case search depth
A = sorted(set(random.randint(990, 1100) for j in xrange(3)))
N = [10000//a for a in A]
print("A =", A, "N = ", N)
d_ub = sum(a*(n-1) for a, n in zip(A, N))
print("d_ub =", d_ub)
c_max = 0
d_max = None
x_max = None
for d in xrange(2*d_ub):
count = [0]
x = dfs_feasible(A, N, d, count)
maybe = maybe_feasible(A, N, d, d)
if not maybe:
assert x is None
if count[0] > c_max:
c_max = count[0]
d_max = d
x_max = x
print("count = {}, d = {}, x = {}".format(c_max, d_max, x_max))
def test_any_divides():
for a in xrange(0, 17):
for b in xrange(a, a + 31):
for x in xrange(1, 17):
assert any_divides(a, b, x) == any([y % x == 0 for y in xrange(a, b+1)])
def euclid(a1, a2):
"""
Euclid's algorithm
Returns
-------
a_gcd, gamma, epsilon
GCD and numbers such that ``gamma*a1 + epsilon*a2 == a_gcd``.
Solutions to ``a1*x1 + a2*x2 == b`` are generated by
``(gamma*b//a_gcd + a2*t//a_gcd, epsilon*b//a_gcd - a1*t//a_gcd) for t in Z``.
"""
assert a1 > 0 and a2 > 0
gamma = 1
delta = 0
epsilon = 0
eta = 1
while True:
if a2 > 0:
r = a1//a2
a1 -= r*a2
gamma -= r*delta
epsilon -= r*eta
elif a2 == 0:
return a1, gamma, epsilon
if a1 > 0:
r = a2//a1
a2 -= r*a1
delta -= r*gamma
eta -= r*epsilon
elif a1 == 0:
return a2, delta, eta
def _ceildiv(a, b):
v, r = divmod(a, b)
if r != 0:
v += 1
return v
def test_ramachandran():
A_ub = 12345
N_ub = 12345
for n in xrange(2, 64):
print("n =", n)
max_count = 0
max_count_args = None
for k in xrange(10):
A = sorted([random.randint(1, A_ub) for j in xrange(n)])
N = [random.randint(1, N_ub) for j in xrange(n)]
d_ub = sum(a*(n-1) for a, n in zip(A, N))
for j in range(1000):
d = random.randint(0, d_ub + 2)
count = [0]
r = ramachandran(A, N, d, count)
import _slds
r2 = _slds.slds(list(A), [0]*len(A), [qq-1 for qq in N], d)
assert (r is None and r2 is None) or tuple(r) == tuple(r2), (r, r2)
if r is None:
assert dfs_feasible(A, N, d) is None, (A, N, d, dfs_feasible(A, N, d))
if count[0] > max_count:
max_count = count[0]
max_count_args = (A, N, d)
# Time it
timing = min(timeit.repeat(lambda: ramachandran(*max_count_args), repeat=3, number=1))
timing_dfs = min(timeit.repeat(lambda: dfs_feasible(*max_count_args), repeat=3, number=1))
print("worst case: {0}, timing: {1}s [dfs_feasible: {2}s)".format(max_count_args, timing, timing_dfs))
def test_slds():
import _slds
A_ub = 12345
N_ub = 12345
for n in xrange(1, 64):
print("n =", n)
max_time = 0
max_time_args = None
for k in xrange(1000):
A = [random.randint(1, A_ub) for j in xrange(n-1)] + [1]
U = [random.randint(0, N_ub) for j in xrange(n-1)] + [16]
L = [0]*n
d_ub = sum(a*n for a, n in zip(A, U))
for j in range(1000):
d = random.randint(0, d_ub + 2)
start = time.time()
r = _slds.slds(A, L, U, d)
timing = time.time() - start
if r is not None:
assert sum(a*x for a, x in zip(A, r)) == d
assert all(lb <= x <= ub for lb, x, ub in zip(L, r, U))
if timing > max_time:
max_time = timing
max_time_args = (A, L, U, d)
print("worst case: {0} ms, {1}".format(max_time*1e3, max_time_args))
def ramachandran(A, N, d, count=None):
"""
Solve bounded Diophantine equation
The problem considered is::
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == d
0 <= x[i] < N[i]
Solve via depth-first Euclid's algorithm, as explained in [1]
References
----------
.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
a System of Linear Diophantine Equations with Bounded Variables''.
Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
182-192 (2006). doi:10.1007/11792086_14
"""
assert all(n > 0 for n in N)
assert all(a > 0 for a in A)
assert len(A) == len(N)
if len(A) == 1:
if N[0] > 0:
return (0,)
else:
return None
elif len(A) == 0:
return None
L = [0]*len(A)
U = [n - 1 for n in N]
Ap, Lp, Up, Gamma, Epsilon = _ramachandran_pre(A, L, U, d)
res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
d, len(A)-1, count)
if res is not None:
# check solution
assert sum(a*x for a, x in zip(A, res)) == d, (A, N, d, res)
assert all(0 <= x < n for n, x in zip(N, res)), (A, N, d, res)
return res
def _ramachandran_pre(A, L, U, d):
"""
Reduce problem to
"""
a_gcd, gamma, epsilon = euclid(A[0], A[1])
c1 = A[0]//a_gcd
c2 = A[1]//a_gcd
Ap = [0]*(len(A)-1)
Gamma = [0]*(len(A)-1)
Epsilon = [0]*(len(A)-1)
Lp = [0]*(len(A)-2)
Up = [0]*(len(A)-2)
Ap[0] = a_gcd
Gamma[0] = gamma
Epsilon[0] = epsilon
if len(A) > 2:
Lp[0] = L[0]*c1 + L[1]*c2
Up[0] = U[0]*c1 + U[1]*c2
for j in xrange(2, len(A)):
a_gcd, gamma, epsilon = euclid(Ap[j-2], A[j])
c1 = Ap[j-2]//a_gcd
c2 = A[j]//a_gcd
Ap[j-1] = a_gcd
Gamma[j-1] = gamma
Epsilon[j-1] = epsilon
if j < len(A) - 1:
Lp[j-1] = c1 * Lp[j-2] + c2 * L[j]
Up[j-1] = c1 * Up[j-2] + c2 * U[j]
return Ap, Lp, Up, Gamma, Epsilon
def _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, b, v, count):
"""
Algorithm in [1]
"""
assert v >= 1
if count is not None:
count[0] += 1
ap = Ap[v-1]
gamma = Gamma[v-1]
epsilon = Epsilon[v-1]
if v == 1:
a1 = A[0]
l1 = L[0]
u1 = U[0]
else:
a1 = Ap[v-2]
l1 = Lp[v-2]
u1 = Up[v-2]
a2 = A[v]
l2 = L[v]
u2 = U[v]
# Generate set of allowed solutions
c, r = divmod(b, ap)
if r != 0:
return None
x1 = gamma * c
x2 = epsilon * c
c1 = a2//ap
c2 = a1//ap
t_l = max(_ceildiv((l1 - x1), c1), _ceildiv((x2 - u2), c2))
t_u = min((u1 - x1)//c1, (x2 - l2)//c2)
if v == 1:
if t_u >= t_l:
x1p = x1 + c1*t_l
x2p = x2 - c2*t_l
return (x1p, x2p)
else:
return None
for t in xrange(t_l, t_u + 1):
x1p = x1 + c1*t
x2p = x2 - c2*t
res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
b - a2*x2p, v - 1, count)
if res is not None:
return res + (x2p,)
return None
from numpy.distutils.core import Extension, setup
from Cython.Build import cythonize
extensions = [
Extension('_slds', sources=['_slds.pyx', 'slds.c'])
]
setup(
name='ramachandran',
ext_modules=cythonize(extensions),
)
/*
Solving a bounded Diophantine equation with positive coefficients.
Copyright (c) 2015 Pauli Virtanen
All rights reserved.
Licensed under 3-clause BSD license, see LICENSE.txt.
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "slds.h"
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
/* Integer addition with overflow checking */
static slds_int_t
safe_add(slds_int_t a, slds_int_t b, char *overflow_flag)
{
if (a > 0 && b > SLDS_INT_MAX - a) {
*overflow_flag = 1;
}
else if (a < 0 && b < SLDS_INT_MIN - a) {
*overflow_flag = 1;
}
return a + b;
}
/* Integer subtraction with overflow checking */
static slds_int_t
safe_sub(slds_int_t a, slds_int_t b, char *overflow_flag)
{
if (a > 0 && b < a - SLDS_INT_MAX) {
*overflow_flag = 1;
}
else if (a < 0 && b > a - SLDS_INT_MIN) {
*overflow_flag = 1;
}
return a - b;
}
/* Integer multiplication with overflow checking */
static slds_int_t
safe_mul(slds_int_t a, slds_int_t b, char *overflow_flag)
{
if (a > 0) {
if (b > SLDS_INT_MAX / a || b < SLDS_INT_MIN / a) {
*overflow_flag = 1;
}
}
else if (a < 0) {
if (b > 0 && a < SLDS_INT_MIN / b) {
*overflow_flag = 1;
}
else if (b < 0 && a < SLDS_INT_MAX / b) {
*overflow_flag = 1;
}
}
return a * b;
}
/* Integer division with overflow checking */
static slds_int_t
safe_div(slds_int_t a, slds_int_t b, char *overflow_flag)
{
if (b == 0 || (b == -1 && a <= SLDS_INT_MIN)) {
*overflow_flag = 1;
}
return a / b;
}
/* Divide and round down (positive divisor; no overflows) */
static slds_int_t
floordiv(slds_int_t a, slds_int_t b)
{
assert(b > 0);
/* C division truncates */
if (a > 0) {
return a / b;
}
else {
slds_int_t v, r;
v = a / b;
r = a % b;
if (r != 0) {
--v; /* cannot overflow */
}
return v;
}
}
/* Divide and round up (positive divisor; no overflows) */
static slds_int_t
ceildiv(slds_int_t a, slds_int_t b)
{
assert(b > 0);
if (a < 0) {
return a / b;
}
else {
slds_int_t v, r;
v = a / b;
r = a % b;
if (r != 0) {
++v; /* cannot overflow */
}
return v;
}
}
/*
* Euclid's algorithm for GCD.
*
* Solves for gamma*a1 + epsilon*a2 == gcd(a1, a2)
* providing |gamma| < |a2|/gcd, |epsilon| < |a1|/gcd.
*/
static void
euclid(slds_int_t a1, slds_int_t a2, slds_int_t *a_gcd, slds_int_t *gamma, slds_int_t *epsilon)
{
slds_int_t gamma1, gamma2, epsilon1, epsilon2, r;
assert(a1 > 0);
assert(a2 > 0);
gamma1 = 1;
gamma2 = 0;
epsilon1 = 0;
epsilon2 = 1;
/* The numbers remain bounded by |a1|, |a2| during
the iteration, so no integer overflows */
while (1) {
if (a2 > 0) {
r = a1/a2;
a1 -= r*a2;
gamma1 -= r*gamma2;
epsilon1 -= r*epsilon2;
}
else {
*a_gcd = a1;
*gamma = gamma1;
*epsilon = epsilon1;
break;
}
if (a1 > 0) {
r = a2/a1;
a2 -= r*a1;
gamma2 -= r*gamma1;
epsilon2 -= r*epsilon1;
}
else {
*a_gcd = a2;
*gamma = gamma2;
*epsilon = epsilon2;
break;
}
}
}
/*
* Precompute GCD and bounds transformations
*/
static int
slds_precompute(unsigned int n,
slds_entry_t *E,
slds_entry_t *Ep,
slds_int_t *Gamma, slds_int_t *Epsilon)
{
slds_int_t a_gcd, gamma, epsilon, c1, c2;
unsigned int j;
char overflow = 0;
assert(n >= 2);
euclid(E[0].a, E[1].a, &a_gcd, &gamma, &epsilon);
Ep[0].a = a_gcd;
Gamma[0] = gamma;
Epsilon[0] = epsilon;
if (n > 2) {
c1 = E[0].a / a_gcd;
c2 = E[1].a / a_gcd;
/* Ep[0].lb = E[0].lb * c1 + E[1].lb * c2; */
Ep[0].lb = safe_add(safe_mul(E[0].lb, c1, &overflow),
safe_mul(E[1].lb, c2, &overflow), &overflow);
/* Ep[0].ub = E[0].ub * c1 + E[1].ub * c2; */
Ep[0].ub = safe_add(safe_mul(E[0].ub, c1, &overflow),
safe_mul(E[1].ub, c2, &overflow), &overflow);
if (overflow) {
return 1;
}
}
for (j = 2; j < n; ++j) {
euclid(Ep[j-2].a, E[j].a, &a_gcd, &gamma, &epsilon);
Ep[j-1].a = a_gcd;
Gamma[j-1] = gamma;
Epsilon[j-1] = epsilon;
if (j < n - 1) {
c1 = Ep[j-2].a / a_gcd;
c2 = E[j].a / a_gcd;
/* Ep[j-1].lb = c1 * Ep[j-2].lb + c2 * E[j].lb; */
Ep[j-1].lb = safe_add(safe_mul(c1, Ep[j-2].lb, &overflow),
safe_mul(c2, E[j].lb, &overflow), &overflow);
/* Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub; */
Ep[j-1].ub = safe_add(safe_mul(c1, Ep[j-2].ub, &overflow),
safe_mul(c2, E[j].ub, &overflow), &overflow);
if (overflow) {
return 1;
}
}
}
return 0;
}
/*
* Depth-first bounded Euclid search
*/
static slds_status_t
slds_dfs(unsigned int v,
slds_entry_t *E,
slds_entry_t *Ep,
slds_int_t *Gamma, slds_int_t *Epsilon,
slds_int_t b,
unsigned long max_work,
slds_int_t *x,
unsigned long *count)
{
slds_int_t a_gcd, gamma, epsilon, a1, l1, u1, a2, l2, u2, c, r, x1, x2, c1, c2, t_l, t_u, t, b2;
slds_status_t res;
char overflow = 0;
if (max_work > 0 && *count >= max_work) {
return TOO_HARD;
}
++*count;
/* Fetch precomputed values for the reduced problem */
if (v == 1) {
a1 = E[0].a;
l1 = E[0].lb;
u1 = E[0].ub;
}
else {
a1 = Ep[v-2].a;
l1 = Ep[v-2].lb;
u1 = Ep[v-2].ub;
}
a2 = E[v].a;
l2 = E[v].lb;
u2 = E[v].ub;
a_gcd = Ep[v-1].a;
gamma = Gamma[v-1];
epsilon = Epsilon[v-1];
/* Generate set of allowed solutions */
c = b / a_gcd;
r = b % a_gcd;
if (r != 0) {
return INFEASIBLE;
}
x1 = safe_mul(gamma, c, &overflow);
x2 = safe_mul(epsilon, c, &overflow);
c1 = a2 / a_gcd;
c2 = a1 / a_gcd;
t_l = MAX(ceildiv(safe_sub(l1, x1, &overflow), c1),
ceildiv(safe_sub(x2, u2, &overflow), c2));
t_u = MIN(floordiv(safe_sub(u1, x1, &overflow), c1),
floordiv(safe_sub(x2, l2, &overflow), c2));
if (overflow) {
return INTEGER_OVERFLOW;
}
/* The bounds t_l, t_u ensure the x computed below do not overflow */
if (v == 1) {
/* Base case */
if (t_u >= t_l) {
x[0] = x1 + c1*t_l;
x[1] = x2 - c2*t_l;
return FEASIBLE;
}
return INFEASIBLE;
}
else {
/* Recurse to all candidates */
for (t = t_l; t <= t_u; ++t) {
x[v] = x2 - c2*t;
/* b2 = b - a2*x[v]; */
b2 = safe_sub(b, safe_mul(a2, x[v], &overflow), &overflow);
if (overflow) {
return INTEGER_OVERFLOW;
}
res = slds_dfs(v-1, E, Ep, Gamma, Epsilon,
b2, max_work, x, count);
if (res != INFEASIBLE) {
return res;
}
}
return INFEASIBLE;
}
}
/*
* Solve bounded Diophantine equation
*
* The problem considered is::
*
* A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
* L[i] <= x[i] <= U[i]
* A[i] > 0
*
* Solve via depth-first Euclid's algorithm, as explained in [1]
*
* References
* ----------
* .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
* a System of Linear Diophantine Equations with Bounded Variables''.
* Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
* 182-192 (2006). doi:10.1007/11792086_14
*/
slds_status_t
slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x)
{
unsigned int j;
for (j = 0; j < n; ++j) {
if (E[j].a <= 0) {
return ERROR;
}
}
if (n <= 0) {
return INFEASIBLE;
}
else if (n == 1) {
if (b % E[0].a == 0) {
x[0] = b / E[0].a;
if (x[0] >= E[0].lb && x[0] <= E[0].ub) {
return FEASIBLE;
}
}
return INFEASIBLE;
}
else {
slds_entry_t Ep[n];
slds_int_t Epsilon[n], Gamma[n];
unsigned long count = 0;
if (slds_precompute(n, E, Ep, Gamma, Epsilon)) {
return INTEGER_OVERFLOW;
}
return slds_dfs(n-1, E, Ep, Gamma, Epsilon, b, max_work, x, &count);
}
}
static int
slds_sort_cmp(const void *xp, const void *yp)
{
return ((slds_entry_t*)yp)->a - ((slds_entry_t*)xp)->a;
}
/*
* Sort big coefficients first, apparently typically reduces search space
*/
void
slds_sort(unsigned int n, slds_entry_t *E)
{
qsort(E, n, sizeof(slds_entry_t), slds_sort_cmp);
}
#ifdef TEST
#include <limits.h>
int main()
{
#define n 2
slds_entry_t E[n] = {
{2, -1, SHRT_MAX},
{3, -1, SHRT_MAX}
};
slds_int_t x[n];
slds_int_t b = SHRT_MAX-7;
unsigned long max_work = 0;
slds_int_t sum;
int j;
slds_status_t res;
clock_t start, end;
unsigned long runcount = 0;
slds_int_t gcd, gamma, epsilon;
slds_sort(n, E);
start = clock();
while (1) {
res = slds(n, E, b, max_work, x);
end = clock();
runcount += 1;
if (end - start > 0.5*CLOCKS_PER_SEC) {
break;
}
}
printf("timing: %g us per run\n", (end - start)*1e6/CLOCKS_PER_SEC/runcount);
if (res == FEASIBLE) {
printf("problem is feasible:\n\n");
sum = 0;
printf("x = ");
for (j = 0; j < n; ++j) {
if (j > 0) {
printf(", ");
}
printf("%ld", (long)x[j]);
}
printf("\n");
for (j = 0; j < n; ++j) {
if (j > 0) {
printf(" + ");
}
printf("%ld*%ld", (long)E[j].a, (long)x[j]);
sum += E[j].a*x[j];
}
printf(" = %ld = %ld", (long)sum, (long)b);
if (sum == b) {
printf(" OK\n");
}
else {
printf(" FAIL\n");
}
for (j = 0; j < n; ++j) {
printf("%ld <= %ld <= %ld", (long)E[j].lb, (long)x[j], (long)E[j].ub);
if (E[j].lb <= x[j] && x[j] <= E[j].ub) {
printf(" OK\n");
}
else {
printf(" FAIL\n");
}
}
return 0;
}
else if (res == INFEASIBLE) {
printf("infeasible\n");
}
else if (res == ERROR) {
printf("error\n");
}
else if (res == TOO_HARD) {
printf("too much work done\n");
}
else if (res == INTEGER_OVERFLOW) {
printf("integer overflow\n");
}
return 1;
}
#endif
#ifndef SLDS_H_
#define SLDS_H_
#include <limits.h>
typedef long slds_int_t;
#define SLDS_INT_MAX LONG_MAX
#define SLDS_INT_MIN LONG_MIN
typedef enum {
FEASIBLE = 0,
INFEASIBLE,
TOO_HARD,
INTEGER_OVERFLOW,
ERROR
} slds_status_t;
typedef struct {
slds_int_t a;
slds_int_t lb;
slds_int_t ub;
unsigned int j;
} slds_entry_t;
slds_status_t slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x);
void slds_sort(unsigned int n, slds_entry_t *E);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment