Last active
August 29, 2015 14:26
-
-
Save pv/fe407dee3170a7e2243a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*~ | |
*.so | |
*.pyc | |
*.pyxbldc | |
/_slds.c | |
/build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cimport libc.stdlib | |
cimport cython | |
cimport numpy as np | |
cdef extern from "slds.h": | |
ctypedef long slds_int_t | |
ctypedef struct slds_entry_t: | |
slds_int_t a | |
slds_int_t lb | |
slds_int_t ub | |
unsigned int j | |
ctypedef enum slds_status_t: | |
FEASIBLE | |
INFEASIBLE | |
TOO_HARD | |
INTEGER_OVERFLOW | |
ERROR | |
slds_status_t _slds "slds" (unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x) | |
void slds_sort(unsigned int n, slds_entry_t *E) | |
class TooHardException(Exception): | |
pass | |
@cython.embedsignature(True) | |
def slds(A, L, U, b, max_work=None): | |
""" | |
Solve bounded Diophantine equation | |
The problem considered is:: | |
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b | |
L[i] <= x[i] <= U[i] | |
A[i] > 0 | |
Solve via depth-first Euclid's algorithm, as explained in [1] | |
Parameters | |
---------- | |
A, L, U : list of int | |
Problem description, lists of integers of equal length | |
b : int | |
Right-hand side | |
max_work : int, optional | |
Maximum number of cases to consider. | |
Default: no upper limit. | |
Returns | |
------- | |
x : list of int or None | |
List of integers describing one solution, or None if the problem | |
is infeasible. | |
Raises | |
------ | |
TooHardException | |
If max_work is given, raised when solution requires too much work. | |
OverflowError | |
If integer overflow prevented completing the calculation. | |
ValueError | |
If invalid input parameters | |
References | |
---------- | |
.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving | |
a System of Linear Diophantine Equations with Bounded Variables''. | |
Algorithmic Number Theory, Lecture Notes in Computer Science **4076**, | |
182-192 (2006). doi:10.1007/11792086_14 | |
""" | |
cdef ssize_t j, n | |
cdef slds_entry_t *E | |
cdef slds_int_t d | |
cdef slds_int_t *x | |
cdef slds_status_t res | |
if max_work is None: | |
max_work = 0 | |
n = len(A) | |
if n < 0 or len(L) != n or len(U) != n: | |
raise ValueError("Invalid lengths of A, L, U") | |
E = <slds_entry_t*>libc.stdlib.calloc(n, sizeof(slds_entry_t)) | |
x = <slds_int_t*>libc.stdlib.calloc(n, sizeof(slds_int_t)) | |
try: | |
if E == NULL or x == NULL: | |
raise MemoryError() | |
for j in range(n): | |
E[j].a = A[j] | |
E[j].lb = L[j] | |
E[j].ub = U[j] | |
E[j].j = j | |
slds_sort(n, E) | |
res = _slds(n, E, b, max_work, x) | |
if res == FEASIBLE: | |
x_list = [0]*n | |
for j in range(n): | |
x_list[E[j].j] = x[j] | |
return x_list | |
elif res == INFEASIBLE: | |
return None | |
elif res == TOO_HARD: | |
raise TooHardException() | |
elif res == INTEGER_OVERFLOW: | |
raise OverflowError() | |
else: | |
raise ValueError("Invalid inputs") | |
finally: | |
libc.stdlib.free(E) | |
libc.stdlib.free(x) | |
def may_share_memory(np.ndarray a_arr, np.ndarray b_arr, max_work=None): | |
base_a = int(<Py_ssize_t>a_arr.data) | |
base_b = int(<Py_ssize_t>b_arr.data) | |
itemsize_a = int(a_arr.dtype.itemsize) | |
itemsize_b = int(b_arr.dtype.itemsize) | |
shape_a = [a_arr.shape[j] for j in range(a_arr.ndim)] + [itemsize_a] | |
shape_b = [b_arr.shape[j] for j in range(b_arr.ndim)] + [itemsize_b] | |
stride_a = [a_arr.strides[j] for j in range(a_arr.ndim)] + [1] | |
stride_b = [b_arr.strides[j] for j in range(b_arr.ndim)] + [1] | |
# Ensure non-empty | |
if min(shape_a) == 0 or min(shape_b) == 0: | |
return False | |
# Drop zero strides and unit dimensions | |
shape_a2 = [] | |
stride_a2 = [] | |
for n, s in zip(shape_a, stride_a): | |
if n != 1 and s != 0: | |
shape_a2.append(n) | |
stride_a2.append(s) | |
shape_a = shape_a2 | |
stride_a = stride_a2 | |
shape_b2 = [] | |
stride_b2 = [] | |
for n, s in zip(shape_b, stride_b): | |
if n != 1 and s != 0: | |
shape_b2.append(n) | |
stride_b2.append(s) | |
shape_b = shape_b2 | |
stride_b = stride_b2 | |
# Both are scalars | |
if len(shape_a) == 0 and len(shape_b) == 0: | |
return base_a == base_b | |
# Make a strides positive | |
for j, s in enumerate(stride_a): | |
if s < 0: | |
stride_a[j] = -s | |
base_a += (shape_a[j] - 1) * s | |
# Make b strides negative | |
for j, s in enumerate(stride_b): | |
if s > 0: | |
stride_b[j] = -s | |
base_b += (shape_b[j] - 1) * s | |
# Diophantine form | |
A = stride_a + [-s for s in stride_b] | |
N = shape_a + shape_b | |
d = base_b - base_a | |
# Sort and combine equal coefficients | |
A2 = [] | |
N2 = [] | |
last_a = 0 | |
for a, n in sorted(zip(A, N)): | |
if a == last_a: | |
N2[-1] += n - 1 | |
else: | |
last_a = a | |
A2.append(a) | |
N2.append(n) | |
A = A2 | |
N = N2 | |
# Solve | |
L = [0]*len(A) | |
U = [n-1 for n in N] | |
try: | |
return slds(A, L, U, d, max_work) is not None | |
except TooHardException: | |
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*-python-*- | |
import os | |
def make_ext(modname, pyxfilename): | |
from distutils.extension import Extension | |
pth = os.path.abspath(os.path.dirname(pyxfilename)) | |
return Extension(name='_slds', | |
sources=[pyxfilename, os.path.join(pth, 'slds.c')], | |
include_dirs=[pth]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, absolute_import, print_function | |
import itertools | |
import random | |
import timeit | |
import time | |
from fractions import gcd | |
from six.moves import xrange | |
# XXX: just a quick try, didn't yet check if it's fully correct | |
def maybe_feasible(A, N, d, f): | |
""" | |
Decide whether a bounded Diophantine equation might have a solution. | |
The problem considered is:: | |
Given integers {A}, {N}, d, f, with 0 < A[0] < A[1] < ... < A[n-1] | |
and N[i] > 0, are there integers x[0], ..., x[n-1] such that | |
d <= A[0]*x[0] + ... + A[n-1]*x[n-1] <= f | |
0 <= x[i] < N[i] | |
Parameters | |
---------- | |
A : list of int | |
N : list of int | |
d : int | |
Returns | |
------- | |
maybe_feasible : bool | |
If True, the problem might or might not have solution. | |
If False, the problem does not have solution. | |
""" | |
assert A[0] > 0 | |
assert all(a < b for a, b in zip(A[:-1], A[1:])) | |
assert all(n > 0 for n in N) | |
# Simple bounds checks | |
if f < 0: | |
return False | |
if sum(a*(n-1) for a, n in zip(A, N)) < d: | |
return False | |
if d <= 0: | |
return True | |
# Check gcd | |
A_gcd = A[0] | |
for a in A[1:]: | |
A_gcd = gcd(a, A_gcd) | |
if not any_divides(d, f, A_gcd): | |
return False | |
# Consider feasibility of remaining range | |
if len(A) > 1: | |
return maybe_feasible(A[1:], N[1:], d - A[0]*(N[0]-1), f) | |
else: | |
return True | |
def any_divides(a, b, x): | |
""" | |
any([y % x == 0 for y in range(a, b+1)]) | |
""" | |
assert x > 0 and b >= a and a >= 0 | |
r = a % x | |
if r == 0: | |
return True | |
return (b - a + r) >= x | |
def dfs_feasible(A, N, d, count=None): | |
""" | |
Same as maybe_feasible but solve exactly (with f = d) | |
and return found solution. | |
""" | |
assert all(n > 0 for n in N) | |
assert all(a > 0 for a in A) | |
if count is not None: | |
count[0] += 1 | |
if d < 0: | |
return None | |
if len(A) == 1: | |
k, r = divmod(d, A[0]) | |
if r != 0 or k >= N[0]: | |
return None | |
return (k,) | |
else: | |
# Check gcd | |
A_gcd = A[0] | |
for a in A[1:]: | |
A_gcd = gcd(a, A_gcd) | |
if d % A_gcd != 0: | |
return None | |
# Iterate over candidates | |
j_max = min(d // A[-1], N[-1] - 1) | |
j_min = max(0, (d - sum(a*(n-1) for a, n in zip(A[:-1], N[:-1]))) // A[-1]) | |
for j in xrange(j_max, j_min-1, -1): | |
v = dfs_feasible(A[:-1], N[:-1], d - j*A[-1], count) | |
if v: | |
return v + (j,) | |
return None | |
def may_share_memory(base_a, shape_a, stride_a, | |
base_b, shape_b, stride_b): | |
shape_a = list(shape_a) | |
shape_b = list(shape_b) | |
stride_a = list(stride_a) | |
stride_b = list(stride_b) | |
# Ensure non-empty | |
if min(shape_a) == 0 or min(shape_b) == 0: | |
return False | |
# Drop zero strides and unit dimensions | |
shape_a2 = [] | |
stride_a2 = [] | |
for n, s in zip(shape_a, stride_a): | |
if n != 1 and s != 0: | |
shape_a2.append(n) | |
stride_a2.append(s) | |
shape_a = shape_a2 | |
stride_a = stride_a2 | |
shape_b2 = [] | |
stride_b2 = [] | |
for n, s in zip(shape_b, stride_b): | |
if n != 1 and s != 0: | |
shape_b2.append(n) | |
stride_b2.append(s) | |
shape_b = shape_b2 | |
stride_b = stride_b2 | |
# Both are scalars | |
if len(shape_a) == 0 and len(shape_b) == 0: | |
return base_a == base_b | |
# Make a strides positive | |
for j, s in enumerate(stride_a): | |
if s < 0: | |
stride_a[j] = -s | |
base_a += (shape_a[j] - 1) * s | |
# Make b strides negative | |
for j, s in enumerate(stride_b): | |
if s > 0: | |
stride_b[j] = -s | |
base_b += (shape_b[j] - 1) * s | |
# Diophantine form | |
A = stride_a + [-s for s in stride_b] | |
N = shape_a + shape_b | |
d = base_b - base_a | |
# Sort and combine equal coefficients | |
A2 = [] | |
N2 = [] | |
last_a = 0 | |
for a, n in sorted(zip(A, N)): | |
if a == last_a: | |
N2[-1] += n - 1 | |
else: | |
last_a = a | |
A2.append(a) | |
N2.append(n) | |
A = A2 | |
N = N2 | |
# Heuristics | |
maybe = maybe_feasible(A, N, d, d) | |
# Exact (brute-force, for checking only) | |
exact = any(sum(v) == d for v in itertools.product(*(range(0, a*n, a) for a, n in zip(A, N)))) | |
if not maybe: | |
assert not exact | |
dfs = dfs_feasible(A, N, d) | |
assert bool(dfs) == exact, (dfs, exact) | |
assert dfs is None or sum(a*x for a, x in zip(A, dfs)) == d | |
print(maybe, exact, dfs) | |
return maybe | |
def test(): | |
import numpy as np | |
x = np.zeros((5, 5, 4), dtype=np.int8) | |
a = x[:,:3,:] | |
b = x[:,3:,:] | |
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides, | |
b.__array_interface__['data'][0], b.shape, b.strides) | |
b = b.reshape(5, 4, 2).T | |
assert b.base is x | |
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides, | |
b.__array_interface__['data'][0], b.shape, b.strides) | |
b = b[1:,1:,1:] | |
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides, | |
b.__array_interface__['data'][0], b.shape, b.strides) | |
b = x[:,2:,:] | |
assert may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides, | |
b.__array_interface__['data'][0], b.shape, b.strides) | |
b = b.copy() | |
assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides, | |
b.__array_interface__['data'][0], b.shape, b.strides) | |
def test_dfs(): | |
# Try to get some idea of worst case search depth | |
A = sorted(set(random.randint(990, 1100) for j in xrange(3))) | |
N = [10000//a for a in A] | |
print("A =", A, "N = ", N) | |
d_ub = sum(a*(n-1) for a, n in zip(A, N)) | |
print("d_ub =", d_ub) | |
c_max = 0 | |
d_max = None | |
x_max = None | |
for d in xrange(2*d_ub): | |
count = [0] | |
x = dfs_feasible(A, N, d, count) | |
maybe = maybe_feasible(A, N, d, d) | |
if not maybe: | |
assert x is None | |
if count[0] > c_max: | |
c_max = count[0] | |
d_max = d | |
x_max = x | |
print("count = {}, d = {}, x = {}".format(c_max, d_max, x_max)) | |
def test_any_divides(): | |
for a in xrange(0, 17): | |
for b in xrange(a, a + 31): | |
for x in xrange(1, 17): | |
assert any_divides(a, b, x) == any([y % x == 0 for y in xrange(a, b+1)]) | |
def euclid(a1, a2): | |
""" | |
Euclid's algorithm | |
Returns | |
------- | |
a_gcd, gamma, epsilon | |
GCD and numbers such that ``gamma*a1 + epsilon*a2 == a_gcd``. | |
Solutions to ``a1*x1 + a2*x2 == b`` are generated by | |
``(gamma*b//a_gcd + a2*t//a_gcd, epsilon*b//a_gcd - a1*t//a_gcd) for t in Z``. | |
""" | |
assert a1 > 0 and a2 > 0 | |
gamma = 1 | |
delta = 0 | |
epsilon = 0 | |
eta = 1 | |
while True: | |
if a2 > 0: | |
r = a1//a2 | |
a1 -= r*a2 | |
gamma -= r*delta | |
epsilon -= r*eta | |
elif a2 == 0: | |
return a1, gamma, epsilon | |
if a1 > 0: | |
r = a2//a1 | |
a2 -= r*a1 | |
delta -= r*gamma | |
eta -= r*epsilon | |
elif a1 == 0: | |
return a2, delta, eta | |
def _ceildiv(a, b): | |
v, r = divmod(a, b) | |
if r != 0: | |
v += 1 | |
return v | |
def test_ramachandran(): | |
A_ub = 12345 | |
N_ub = 12345 | |
for n in xrange(2, 64): | |
print("n =", n) | |
max_count = 0 | |
max_count_args = None | |
for k in xrange(10): | |
A = sorted([random.randint(1, A_ub) for j in xrange(n)]) | |
N = [random.randint(1, N_ub) for j in xrange(n)] | |
d_ub = sum(a*(n-1) for a, n in zip(A, N)) | |
for j in range(1000): | |
d = random.randint(0, d_ub + 2) | |
count = [0] | |
r = ramachandran(A, N, d, count) | |
import _slds | |
r2 = _slds.slds(list(A), [0]*len(A), [qq-1 for qq in N], d) | |
assert (r is None and r2 is None) or tuple(r) == tuple(r2), (r, r2) | |
if r is None: | |
assert dfs_feasible(A, N, d) is None, (A, N, d, dfs_feasible(A, N, d)) | |
if count[0] > max_count: | |
max_count = count[0] | |
max_count_args = (A, N, d) | |
# Time it | |
timing = min(timeit.repeat(lambda: ramachandran(*max_count_args), repeat=3, number=1)) | |
timing_dfs = min(timeit.repeat(lambda: dfs_feasible(*max_count_args), repeat=3, number=1)) | |
print("worst case: {0}, timing: {1}s [dfs_feasible: {2}s)".format(max_count_args, timing, timing_dfs)) | |
def test_slds(): | |
import _slds | |
A_ub = 12345 | |
N_ub = 12345 | |
for n in xrange(1, 64): | |
print("n =", n) | |
max_time = 0 | |
max_time_args = None | |
for k in xrange(1000): | |
A = [random.randint(1, A_ub) for j in xrange(n-1)] + [1] | |
U = [random.randint(0, N_ub) for j in xrange(n-1)] + [16] | |
L = [0]*n | |
d_ub = sum(a*n for a, n in zip(A, U)) | |
for j in range(1000): | |
d = random.randint(0, d_ub + 2) | |
start = time.time() | |
r = _slds.slds(A, L, U, d) | |
timing = time.time() - start | |
if r is not None: | |
assert sum(a*x for a, x in zip(A, r)) == d | |
assert all(lb <= x <= ub for lb, x, ub in zip(L, r, U)) | |
if timing > max_time: | |
max_time = timing | |
max_time_args = (A, L, U, d) | |
print("worst case: {0} ms, {1}".format(max_time*1e3, max_time_args)) | |
def ramachandran(A, N, d, count=None): | |
""" | |
Solve bounded Diophantine equation | |
The problem considered is:: | |
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == d | |
0 <= x[i] < N[i] | |
Solve via depth-first Euclid's algorithm, as explained in [1] | |
References | |
---------- | |
.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving | |
a System of Linear Diophantine Equations with Bounded Variables''. | |
Algorithmic Number Theory, Lecture Notes in Computer Science **4076**, | |
182-192 (2006). doi:10.1007/11792086_14 | |
""" | |
assert all(n > 0 for n in N) | |
assert all(a > 0 for a in A) | |
assert len(A) == len(N) | |
if len(A) == 1: | |
if N[0] > 0: | |
return (0,) | |
else: | |
return None | |
elif len(A) == 0: | |
return None | |
L = [0]*len(A) | |
U = [n - 1 for n in N] | |
Ap, Lp, Up, Gamma, Epsilon = _ramachandran_pre(A, L, U, d) | |
res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, | |
d, len(A)-1, count) | |
if res is not None: | |
# check solution | |
assert sum(a*x for a, x in zip(A, res)) == d, (A, N, d, res) | |
assert all(0 <= x < n for n, x in zip(N, res)), (A, N, d, res) | |
return res | |
def _ramachandran_pre(A, L, U, d): | |
""" | |
Reduce problem to | |
""" | |
a_gcd, gamma, epsilon = euclid(A[0], A[1]) | |
c1 = A[0]//a_gcd | |
c2 = A[1]//a_gcd | |
Ap = [0]*(len(A)-1) | |
Gamma = [0]*(len(A)-1) | |
Epsilon = [0]*(len(A)-1) | |
Lp = [0]*(len(A)-2) | |
Up = [0]*(len(A)-2) | |
Ap[0] = a_gcd | |
Gamma[0] = gamma | |
Epsilon[0] = epsilon | |
if len(A) > 2: | |
Lp[0] = L[0]*c1 + L[1]*c2 | |
Up[0] = U[0]*c1 + U[1]*c2 | |
for j in xrange(2, len(A)): | |
a_gcd, gamma, epsilon = euclid(Ap[j-2], A[j]) | |
c1 = Ap[j-2]//a_gcd | |
c2 = A[j]//a_gcd | |
Ap[j-1] = a_gcd | |
Gamma[j-1] = gamma | |
Epsilon[j-1] = epsilon | |
if j < len(A) - 1: | |
Lp[j-1] = c1 * Lp[j-2] + c2 * L[j] | |
Up[j-1] = c1 * Up[j-2] + c2 * U[j] | |
return Ap, Lp, Up, Gamma, Epsilon | |
def _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, b, v, count): | |
""" | |
Algorithm in [1] | |
""" | |
assert v >= 1 | |
if count is not None: | |
count[0] += 1 | |
ap = Ap[v-1] | |
gamma = Gamma[v-1] | |
epsilon = Epsilon[v-1] | |
if v == 1: | |
a1 = A[0] | |
l1 = L[0] | |
u1 = U[0] | |
else: | |
a1 = Ap[v-2] | |
l1 = Lp[v-2] | |
u1 = Up[v-2] | |
a2 = A[v] | |
l2 = L[v] | |
u2 = U[v] | |
# Generate set of allowed solutions | |
c, r = divmod(b, ap) | |
if r != 0: | |
return None | |
x1 = gamma * c | |
x2 = epsilon * c | |
c1 = a2//ap | |
c2 = a1//ap | |
t_l = max(_ceildiv((l1 - x1), c1), _ceildiv((x2 - u2), c2)) | |
t_u = min((u1 - x1)//c1, (x2 - l2)//c2) | |
if v == 1: | |
if t_u >= t_l: | |
x1p = x1 + c1*t_l | |
x2p = x2 - c2*t_l | |
return (x1p, x2p) | |
else: | |
return None | |
for t in xrange(t_l, t_u + 1): | |
x1p = x1 + c1*t | |
x2p = x2 - c2*t | |
res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, | |
b - a2*x2p, v - 1, count) | |
if res is not None: | |
return res + (x2p,) | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy.distutils.core import Extension, setup | |
from Cython.Build import cythonize | |
extensions = [ | |
Extension('_slds', sources=['_slds.pyx', 'slds.c']) | |
] | |
setup( | |
name='ramachandran', | |
ext_modules=cythonize(extensions), | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Solving a bounded Diophantine equation with positive coefficients. | |
Copyright (c) 2015 Pauli Virtanen | |
All rights reserved. | |
Licensed under 3-clause BSD license, see LICENSE.txt. | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <time.h> | |
#include "slds.h" | |
#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) | |
#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) | |
/* Integer addition with overflow checking */ | |
static slds_int_t | |
safe_add(slds_int_t a, slds_int_t b, char *overflow_flag) | |
{ | |
if (a > 0 && b > SLDS_INT_MAX - a) { | |
*overflow_flag = 1; | |
} | |
else if (a < 0 && b < SLDS_INT_MIN - a) { | |
*overflow_flag = 1; | |
} | |
return a + b; | |
} | |
/* Integer subtraction with overflow checking */ | |
static slds_int_t | |
safe_sub(slds_int_t a, slds_int_t b, char *overflow_flag) | |
{ | |
if (a > 0 && b < a - SLDS_INT_MAX) { | |
*overflow_flag = 1; | |
} | |
else if (a < 0 && b > a - SLDS_INT_MIN) { | |
*overflow_flag = 1; | |
} | |
return a - b; | |
} | |
/* Integer multiplication with overflow checking */ | |
static slds_int_t | |
safe_mul(slds_int_t a, slds_int_t b, char *overflow_flag) | |
{ | |
if (a > 0) { | |
if (b > SLDS_INT_MAX / a || b < SLDS_INT_MIN / a) { | |
*overflow_flag = 1; | |
} | |
} | |
else if (a < 0) { | |
if (b > 0 && a < SLDS_INT_MIN / b) { | |
*overflow_flag = 1; | |
} | |
else if (b < 0 && a < SLDS_INT_MAX / b) { | |
*overflow_flag = 1; | |
} | |
} | |
return a * b; | |
} | |
/* Integer division with overflow checking */ | |
static slds_int_t | |
safe_div(slds_int_t a, slds_int_t b, char *overflow_flag) | |
{ | |
if (b == 0 || (b == -1 && a <= SLDS_INT_MIN)) { | |
*overflow_flag = 1; | |
} | |
return a / b; | |
} | |
/* Divide and round down (positive divisor; no overflows) */ | |
static slds_int_t | |
floordiv(slds_int_t a, slds_int_t b) | |
{ | |
assert(b > 0); | |
/* C division truncates */ | |
if (a > 0) { | |
return a / b; | |
} | |
else { | |
slds_int_t v, r; | |
v = a / b; | |
r = a % b; | |
if (r != 0) { | |
--v; /* cannot overflow */ | |
} | |
return v; | |
} | |
} | |
/* Divide and round up (positive divisor; no overflows) */ | |
static slds_int_t | |
ceildiv(slds_int_t a, slds_int_t b) | |
{ | |
assert(b > 0); | |
if (a < 0) { | |
return a / b; | |
} | |
else { | |
slds_int_t v, r; | |
v = a / b; | |
r = a % b; | |
if (r != 0) { | |
++v; /* cannot overflow */ | |
} | |
return v; | |
} | |
} | |
/* | |
* Euclid's algorithm for GCD. | |
* | |
* Solves for gamma*a1 + epsilon*a2 == gcd(a1, a2) | |
* providing |gamma| < |a2|/gcd, |epsilon| < |a1|/gcd. | |
*/ | |
static void | |
euclid(slds_int_t a1, slds_int_t a2, slds_int_t *a_gcd, slds_int_t *gamma, slds_int_t *epsilon) | |
{ | |
slds_int_t gamma1, gamma2, epsilon1, epsilon2, r; | |
assert(a1 > 0); | |
assert(a2 > 0); | |
gamma1 = 1; | |
gamma2 = 0; | |
epsilon1 = 0; | |
epsilon2 = 1; | |
/* The numbers remain bounded by |a1|, |a2| during | |
the iteration, so no integer overflows */ | |
while (1) { | |
if (a2 > 0) { | |
r = a1/a2; | |
a1 -= r*a2; | |
gamma1 -= r*gamma2; | |
epsilon1 -= r*epsilon2; | |
} | |
else { | |
*a_gcd = a1; | |
*gamma = gamma1; | |
*epsilon = epsilon1; | |
break; | |
} | |
if (a1 > 0) { | |
r = a2/a1; | |
a2 -= r*a1; | |
gamma2 -= r*gamma1; | |
epsilon2 -= r*epsilon1; | |
} | |
else { | |
*a_gcd = a2; | |
*gamma = gamma2; | |
*epsilon = epsilon2; | |
break; | |
} | |
} | |
} | |
/* | |
* Precompute GCD and bounds transformations | |
*/ | |
static int | |
slds_precompute(unsigned int n, | |
slds_entry_t *E, | |
slds_entry_t *Ep, | |
slds_int_t *Gamma, slds_int_t *Epsilon) | |
{ | |
slds_int_t a_gcd, gamma, epsilon, c1, c2; | |
unsigned int j; | |
char overflow = 0; | |
assert(n >= 2); | |
euclid(E[0].a, E[1].a, &a_gcd, &gamma, &epsilon); | |
Ep[0].a = a_gcd; | |
Gamma[0] = gamma; | |
Epsilon[0] = epsilon; | |
if (n > 2) { | |
c1 = E[0].a / a_gcd; | |
c2 = E[1].a / a_gcd; | |
/* Ep[0].lb = E[0].lb * c1 + E[1].lb * c2; */ | |
Ep[0].lb = safe_add(safe_mul(E[0].lb, c1, &overflow), | |
safe_mul(E[1].lb, c2, &overflow), &overflow); | |
/* Ep[0].ub = E[0].ub * c1 + E[1].ub * c2; */ | |
Ep[0].ub = safe_add(safe_mul(E[0].ub, c1, &overflow), | |
safe_mul(E[1].ub, c2, &overflow), &overflow); | |
if (overflow) { | |
return 1; | |
} | |
} | |
for (j = 2; j < n; ++j) { | |
euclid(Ep[j-2].a, E[j].a, &a_gcd, &gamma, &epsilon); | |
Ep[j-1].a = a_gcd; | |
Gamma[j-1] = gamma; | |
Epsilon[j-1] = epsilon; | |
if (j < n - 1) { | |
c1 = Ep[j-2].a / a_gcd; | |
c2 = E[j].a / a_gcd; | |
/* Ep[j-1].lb = c1 * Ep[j-2].lb + c2 * E[j].lb; */ | |
Ep[j-1].lb = safe_add(safe_mul(c1, Ep[j-2].lb, &overflow), | |
safe_mul(c2, E[j].lb, &overflow), &overflow); | |
/* Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub; */ | |
Ep[j-1].ub = safe_add(safe_mul(c1, Ep[j-2].ub, &overflow), | |
safe_mul(c2, E[j].ub, &overflow), &overflow); | |
if (overflow) { | |
return 1; | |
} | |
} | |
} | |
return 0; | |
} | |
/* | |
* Depth-first bounded Euclid search | |
*/ | |
static slds_status_t | |
slds_dfs(unsigned int v, | |
slds_entry_t *E, | |
slds_entry_t *Ep, | |
slds_int_t *Gamma, slds_int_t *Epsilon, | |
slds_int_t b, | |
unsigned long max_work, | |
slds_int_t *x, | |
unsigned long *count) | |
{ | |
slds_int_t a_gcd, gamma, epsilon, a1, l1, u1, a2, l2, u2, c, r, x1, x2, c1, c2, t_l, t_u, t, b2; | |
slds_status_t res; | |
char overflow = 0; | |
if (max_work > 0 && *count >= max_work) { | |
return TOO_HARD; | |
} | |
++*count; | |
/* Fetch precomputed values for the reduced problem */ | |
if (v == 1) { | |
a1 = E[0].a; | |
l1 = E[0].lb; | |
u1 = E[0].ub; | |
} | |
else { | |
a1 = Ep[v-2].a; | |
l1 = Ep[v-2].lb; | |
u1 = Ep[v-2].ub; | |
} | |
a2 = E[v].a; | |
l2 = E[v].lb; | |
u2 = E[v].ub; | |
a_gcd = Ep[v-1].a; | |
gamma = Gamma[v-1]; | |
epsilon = Epsilon[v-1]; | |
/* Generate set of allowed solutions */ | |
c = b / a_gcd; | |
r = b % a_gcd; | |
if (r != 0) { | |
return INFEASIBLE; | |
} | |
x1 = safe_mul(gamma, c, &overflow); | |
x2 = safe_mul(epsilon, c, &overflow); | |
c1 = a2 / a_gcd; | |
c2 = a1 / a_gcd; | |
t_l = MAX(ceildiv(safe_sub(l1, x1, &overflow), c1), | |
ceildiv(safe_sub(x2, u2, &overflow), c2)); | |
t_u = MIN(floordiv(safe_sub(u1, x1, &overflow), c1), | |
floordiv(safe_sub(x2, l2, &overflow), c2)); | |
if (overflow) { | |
return INTEGER_OVERFLOW; | |
} | |
/* The bounds t_l, t_u ensure the x computed below do not overflow */ | |
if (v == 1) { | |
/* Base case */ | |
if (t_u >= t_l) { | |
x[0] = x1 + c1*t_l; | |
x[1] = x2 - c2*t_l; | |
return FEASIBLE; | |
} | |
return INFEASIBLE; | |
} | |
else { | |
/* Recurse to all candidates */ | |
for (t = t_l; t <= t_u; ++t) { | |
x[v] = x2 - c2*t; | |
/* b2 = b - a2*x[v]; */ | |
b2 = safe_sub(b, safe_mul(a2, x[v], &overflow), &overflow); | |
if (overflow) { | |
return INTEGER_OVERFLOW; | |
} | |
res = slds_dfs(v-1, E, Ep, Gamma, Epsilon, | |
b2, max_work, x, count); | |
if (res != INFEASIBLE) { | |
return res; | |
} | |
} | |
return INFEASIBLE; | |
} | |
} | |
/* | |
* Solve bounded Diophantine equation | |
* | |
* The problem considered is:: | |
* | |
* A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b | |
* L[i] <= x[i] <= U[i] | |
* A[i] > 0 | |
* | |
* Solve via depth-first Euclid's algorithm, as explained in [1] | |
* | |
* References | |
* ---------- | |
* .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving | |
* a System of Linear Diophantine Equations with Bounded Variables''. | |
* Algorithmic Number Theory, Lecture Notes in Computer Science **4076**, | |
* 182-192 (2006). doi:10.1007/11792086_14 | |
*/ | |
slds_status_t | |
slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x) | |
{ | |
unsigned int j; | |
for (j = 0; j < n; ++j) { | |
if (E[j].a <= 0) { | |
return ERROR; | |
} | |
} | |
if (n <= 0) { | |
return INFEASIBLE; | |
} | |
else if (n == 1) { | |
if (b % E[0].a == 0) { | |
x[0] = b / E[0].a; | |
if (x[0] >= E[0].lb && x[0] <= E[0].ub) { | |
return FEASIBLE; | |
} | |
} | |
return INFEASIBLE; | |
} | |
else { | |
slds_entry_t Ep[n]; | |
slds_int_t Epsilon[n], Gamma[n]; | |
unsigned long count = 0; | |
if (slds_precompute(n, E, Ep, Gamma, Epsilon)) { | |
return INTEGER_OVERFLOW; | |
} | |
return slds_dfs(n-1, E, Ep, Gamma, Epsilon, b, max_work, x, &count); | |
} | |
} | |
static int | |
slds_sort_cmp(const void *xp, const void *yp) | |
{ | |
return ((slds_entry_t*)yp)->a - ((slds_entry_t*)xp)->a; | |
} | |
/* | |
* Sort big coefficients first, apparently typically reduces search space | |
*/ | |
void | |
slds_sort(unsigned int n, slds_entry_t *E) | |
{ | |
qsort(E, n, sizeof(slds_entry_t), slds_sort_cmp); | |
} | |
#ifdef TEST | |
#include <limits.h> | |
int main() | |
{ | |
#define n 2 | |
slds_entry_t E[n] = { | |
{2, -1, SHRT_MAX}, | |
{3, -1, SHRT_MAX} | |
}; | |
slds_int_t x[n]; | |
slds_int_t b = SHRT_MAX-7; | |
unsigned long max_work = 0; | |
slds_int_t sum; | |
int j; | |
slds_status_t res; | |
clock_t start, end; | |
unsigned long runcount = 0; | |
slds_int_t gcd, gamma, epsilon; | |
slds_sort(n, E); | |
start = clock(); | |
while (1) { | |
res = slds(n, E, b, max_work, x); | |
end = clock(); | |
runcount += 1; | |
if (end - start > 0.5*CLOCKS_PER_SEC) { | |
break; | |
} | |
} | |
printf("timing: %g us per run\n", (end - start)*1e6/CLOCKS_PER_SEC/runcount); | |
if (res == FEASIBLE) { | |
printf("problem is feasible:\n\n"); | |
sum = 0; | |
printf("x = "); | |
for (j = 0; j < n; ++j) { | |
if (j > 0) { | |
printf(", "); | |
} | |
printf("%ld", (long)x[j]); | |
} | |
printf("\n"); | |
for (j = 0; j < n; ++j) { | |
if (j > 0) { | |
printf(" + "); | |
} | |
printf("%ld*%ld", (long)E[j].a, (long)x[j]); | |
sum += E[j].a*x[j]; | |
} | |
printf(" = %ld = %ld", (long)sum, (long)b); | |
if (sum == b) { | |
printf(" OK\n"); | |
} | |
else { | |
printf(" FAIL\n"); | |
} | |
for (j = 0; j < n; ++j) { | |
printf("%ld <= %ld <= %ld", (long)E[j].lb, (long)x[j], (long)E[j].ub); | |
if (E[j].lb <= x[j] && x[j] <= E[j].ub) { | |
printf(" OK\n"); | |
} | |
else { | |
printf(" FAIL\n"); | |
} | |
} | |
return 0; | |
} | |
else if (res == INFEASIBLE) { | |
printf("infeasible\n"); | |
} | |
else if (res == ERROR) { | |
printf("error\n"); | |
} | |
else if (res == TOO_HARD) { | |
printf("too much work done\n"); | |
} | |
else if (res == INTEGER_OVERFLOW) { | |
printf("integer overflow\n"); | |
} | |
return 1; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef SLDS_H_ | |
#define SLDS_H_ | |
#include <limits.h> | |
typedef long slds_int_t; | |
#define SLDS_INT_MAX LONG_MAX | |
#define SLDS_INT_MIN LONG_MIN | |
typedef enum { | |
FEASIBLE = 0, | |
INFEASIBLE, | |
TOO_HARD, | |
INTEGER_OVERFLOW, | |
ERROR | |
} slds_status_t; | |
typedef struct { | |
slds_int_t a; | |
slds_int_t lb; | |
slds_int_t ub; | |
unsigned int j; | |
} slds_entry_t; | |
slds_status_t slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x); | |
void slds_sort(unsigned int n, slds_entry_t *E); | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment