pv/.gitignore

## .gitignore
*~
*.so
*.pyc
*.pyxbldc
/_slds.c
/build

## _slds.pyx
cimport libc.stdlib
cimport cython
cimport numpy as np


cdef extern from "slds.h":
    ctypedef long slds_int_t

    ctypedef struct slds_entry_t:
        slds_int_t a
        slds_int_t lb
        slds_int_t ub
        unsigned int j

    ctypedef enum slds_status_t:
        FEASIBLE
        INFEASIBLE
        TOO_HARD
        INTEGER_OVERFLOW
        ERROR

    slds_status_t _slds "slds" (unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x)
    void slds_sort(unsigned int n, slds_entry_t *E)


class TooHardException(Exception):
    pass


@cython.embedsignature(True)
def slds(A, L, U, b, max_work=None):
    """
    Solve bounded Diophantine equation

    The problem considered is::

        A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
        L[i] <= x[i] <= U[i]
        A[i] > 0

    Solve via depth-first Euclid's algorithm, as explained in [1]

    Parameters
    ----------
    A, L, U : list of int
        Problem description, lists of integers of equal length
    b : int
        Right-hand side
    max_work : int, optional
        Maximum number of cases to consider.
        Default: no upper limit.

    Returns
    -------
    x : list of int or None
        List of integers describing one solution, or None if the problem
        is infeasible.

    Raises
    ------
    TooHardException
        If max_work is given, raised when solution requires too much work.
    OverflowError
        If integer overflow prevented completing the calculation.
    ValueError
        If invalid input parameters

    References
    ----------
    .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
           a System of Linear Diophantine Equations with Bounded Variables''.
           Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
           182-192 (2006). doi:10.1007/11792086_14
    """
    cdef ssize_t j, n
    cdef slds_entry_t *E
    cdef slds_int_t d
    cdef slds_int_t *x
    cdef slds_status_t res

    if max_work is None:
        max_work = 0

    n = len(A)

    if n < 0 or len(L) != n or len(U) != n:
        raise ValueError("Invalid lengths of A, L, U")

    E = <slds_entry_t*>libc.stdlib.calloc(n, sizeof(slds_entry_t))
    x = <slds_int_t*>libc.stdlib.calloc(n, sizeof(slds_int_t))
    try:
        if E == NULL or x == NULL:
            raise MemoryError()

        for j in range(n):
            E[j].a = A[j]
            E[j].lb = L[j]
            E[j].ub = U[j]
            E[j].j = j

        slds_sort(n, E)
        res = _slds(n, E, b, max_work, x)
        if res == FEASIBLE:
            x_list = [0]*n
            for j in range(n):
                x_list[E[j].j] = x[j]
            return x_list
        elif res == INFEASIBLE:
            return None
        elif res == TOO_HARD:
            raise TooHardException()
        elif res == INTEGER_OVERFLOW:
            raise OverflowError()
        else:
            raise ValueError("Invalid inputs")
    finally:
        libc.stdlib.free(E)
        libc.stdlib.free(x)


def may_share_memory(np.ndarray a_arr, np.ndarray b_arr, max_work=None):
    base_a = int(<Py_ssize_t>a_arr.data)
    base_b = int(<Py_ssize_t>b_arr.data)

    itemsize_a = int(a_arr.dtype.itemsize)
    itemsize_b = int(b_arr.dtype.itemsize)

    shape_a = [a_arr.shape[j] for j in range(a_arr.ndim)] + [itemsize_a]
    shape_b = [b_arr.shape[j] for j in range(b_arr.ndim)] + [itemsize_b]

    stride_a = [a_arr.strides[j] for j in range(a_arr.ndim)] + [1]
    stride_b = [b_arr.strides[j] for j in range(b_arr.ndim)] + [1]

    # Ensure non-empty
    if min(shape_a) == 0 or min(shape_b) == 0:
        return False

    # Drop zero strides and unit dimensions
    shape_a2 = []
    stride_a2 = []
    for n, s in zip(shape_a, stride_a):
        if n != 1 and s != 0:
            shape_a2.append(n)
            stride_a2.append(s)
    shape_a = shape_a2
    stride_a = stride_a2

    shape_b2 = []
    stride_b2 = []
    for n, s in zip(shape_b, stride_b):
        if n != 1 and s != 0:
            shape_b2.append(n)
            stride_b2.append(s)
    shape_b = shape_b2
    stride_b = stride_b2

    # Both are scalars
    if len(shape_a) == 0 and len(shape_b) == 0:
        return base_a == base_b

    # Make a strides positive
    for j, s in enumerate(stride_a):
        if s < 0:
            stride_a[j] = -s
            base_a += (shape_a[j] - 1) * s

    # Make b strides negative
    for j, s in enumerate(stride_b):
        if s > 0:
            stride_b[j] = -s
            base_b += (shape_b[j] - 1) * s

    # Diophantine form
    A = stride_a + [-s for s in stride_b]
    N = shape_a + shape_b
    d = base_b - base_a

    # Sort and combine equal coefficients
    A2 = []
    N2 = []
    last_a = 0
    for a, n in sorted(zip(A, N)):
        if a == last_a:
            N2[-1] += n - 1
        else:
            last_a = a
            A2.append(a)
            N2.append(n)
    A = A2
    N = N2

    # Solve
    L = [0]*len(A)
    U = [n-1 for n in N]

    try:
        return slds(A, L, U, d, max_work) is not None
    except TooHardException:
        return True

## _slds.pyxbld
# -*-python-*-
import os

def make_ext(modname, pyxfilename):
    from distutils.extension import Extension
    pth = os.path.abspath(os.path.dirname(pyxfilename))
    return Extension(name='_slds',
                     sources=[pyxfilename, os.path.join(pth, 'slds.c')],
                     include_dirs=[pth])

## maybe.py
from __future__ import division, absolute_import, print_function

import itertools
import random
import timeit
import time
from fractions import gcd
from six.moves import xrange

# XXX: just a quick try, didn't yet check if it's fully correct


def maybe_feasible(A, N, d, f):
    """
    Decide whether a bounded Diophantine equation might have a solution.

    The problem considered is::

        Given integers {A}, {N}, d, f, with 0 < A[0] < A[1] < ... < A[n-1]
        and N[i] > 0, are there integers x[0], ..., x[n-1] such that

        d <= A[0]*x[0] + ... + A[n-1]*x[n-1] <= f
        0 <= x[i] < N[i]

    Parameters
    ----------
    A : list of int
    N : list of int
    d : int

    Returns
    -------
    maybe_feasible : bool
        If True, the problem might or might not have solution.
        If False, the problem does not have solution.

    """

    assert A[0] > 0
    assert all(a < b for a, b in zip(A[:-1], A[1:]))
    assert all(n > 0 for n in N)

    # Simple bounds checks
    if f < 0:
        return False

    if sum(a*(n-1) for a, n in zip(A, N)) < d:
        return False

    if d <= 0:
        return True

    # Check gcd
    A_gcd = A[0]
    for a in A[1:]:
        A_gcd = gcd(a, A_gcd)

    if not any_divides(d, f, A_gcd):
        return False

    # Consider feasibility of remaining range
    if len(A) > 1:
        return maybe_feasible(A[1:], N[1:], d - A[0]*(N[0]-1), f)
    else:
        return True


def any_divides(a, b, x):
    """
    any([y % x == 0 for y in range(a, b+1)])
    """
    assert x > 0 and b >= a and a >= 0
    r = a % x
    if r == 0:
        return True
    return (b - a + r) >= x


def dfs_feasible(A, N, d, count=None):
    """
    Same as maybe_feasible but solve exactly (with f = d)
    and return found solution.
    """

    assert all(n > 0 for n in N)
    assert all(a > 0 for a in A)

    if count is not None:
        count[0] += 1

    if d < 0:
        return None

    if len(A) == 1:
        k, r = divmod(d, A[0])
        if r != 0 or k >= N[0]:
            return None
        return (k,)
    else:
        # Check gcd
        A_gcd = A[0]
        for a in A[1:]:
            A_gcd = gcd(a, A_gcd)

        if d % A_gcd != 0:
            return None

        # Iterate over candidates
        j_max = min(d // A[-1], N[-1] - 1)
        j_min = max(0, (d - sum(a*(n-1) for a, n in zip(A[:-1], N[:-1]))) // A[-1])
        for j in xrange(j_max, j_min-1, -1):
            v = dfs_feasible(A[:-1], N[:-1], d - j*A[-1], count)
            if v:
                return v + (j,)
        return None


def may_share_memory(base_a, shape_a, stride_a,
                     base_b, shape_b, stride_b):
    shape_a = list(shape_a)
    shape_b = list(shape_b)

    stride_a = list(stride_a)
    stride_b = list(stride_b)

    # Ensure non-empty
    if min(shape_a) == 0 or min(shape_b) == 0:
        return False

    # Drop zero strides and unit dimensions
    shape_a2 = []
    stride_a2 = []
    for n, s in zip(shape_a, stride_a):
        if n != 1 and s != 0:
            shape_a2.append(n)
            stride_a2.append(s)
    shape_a = shape_a2
    stride_a = stride_a2

    shape_b2 = []
    stride_b2 = []
    for n, s in zip(shape_b, stride_b):
        if n != 1 and s != 0:
            shape_b2.append(n)
            stride_b2.append(s)
    shape_b = shape_b2
    stride_b = stride_b2

    # Both are scalars
    if len(shape_a) == 0 and len(shape_b) == 0:
        return base_a == base_b

    # Make a strides positive
    for j, s in enumerate(stride_a):
        if s < 0:
            stride_a[j] = -s
            base_a += (shape_a[j] - 1) * s

    # Make b strides negative
    for j, s in enumerate(stride_b):
        if s > 0:
            stride_b[j] = -s
            base_b += (shape_b[j] - 1) * s

    # Diophantine form
    A = stride_a + [-s for s in stride_b]
    N = shape_a + shape_b
    d = base_b - base_a

    # Sort and combine equal coefficients
    A2 = []
    N2 = []
    last_a = 0
    for a, n in sorted(zip(A, N)):
        if a == last_a:
            N2[-1] += n - 1
        else:
            last_a = a
            A2.append(a)
            N2.append(n)
    A = A2
    N = N2

    # Heuristics
    maybe = maybe_feasible(A, N, d, d)

    # Exact (brute-force, for checking only)
    exact = any(sum(v) == d for v in itertools.product(*(range(0, a*n, a) for a, n in zip(A, N))))

    if not maybe:
        assert not exact

    dfs = dfs_feasible(A, N, d)
    assert bool(dfs) == exact, (dfs, exact)
    assert dfs is None or sum(a*x for a, x in zip(A, dfs)) == d

    print(maybe, exact, dfs)
    return maybe


def test():
    import numpy as np

    x = np.zeros((5, 5, 4), dtype=np.int8)

    a = x[:,:3,:]
    b = x[:,3:,:]
    assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
                                b.__array_interface__['data'][0], b.shape, b.strides)

    b = b.reshape(5, 4, 2).T
    assert b.base is x
    assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
                                b.__array_interface__['data'][0], b.shape, b.strides)

    b = b[1:,1:,1:]
    assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
                                b.__array_interface__['data'][0], b.shape, b.strides)

    b = x[:,2:,:]
    assert may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
                            b.__array_interface__['data'][0], b.shape, b.strides)

    b = b.copy()
    assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
                                b.__array_interface__['data'][0], b.shape, b.strides)


def test_dfs():
    # Try to get some idea of worst case search depth
    A = sorted(set(random.randint(990, 1100) for j in xrange(3)))
    N = [10000//a for a in A]

    print("A =", A, "N = ", N)

    d_ub = sum(a*(n-1) for a, n in zip(A, N))
    print("d_ub =", d_ub)

    c_max = 0
    d_max = None
    x_max = None

    for d in xrange(2*d_ub):
        count = [0]
        x = dfs_feasible(A, N, d, count)

        maybe = maybe_feasible(A, N, d, d)
        if not maybe:
            assert x is None

        if count[0] > c_max:
            c_max = count[0]
            d_max = d
            x_max = x
            print("count = {}, d = {}, x = {}".format(c_max, d_max, x_max))


def test_any_divides():
    for a in xrange(0, 17):
        for b in xrange(a, a + 31):
            for x in xrange(1, 17):
                assert any_divides(a, b, x) == any([y % x == 0 for y in xrange(a, b+1)])


def euclid(a1, a2):
    """
    Euclid's algorithm

    Returns
    -------
    a_gcd, gamma, epsilon
        GCD and numbers such that ``gamma*a1 + epsilon*a2 == a_gcd``.
        Solutions to ``a1*x1 + a2*x2 == b`` are generated by
        ``(gamma*b//a_gcd + a2*t//a_gcd, epsilon*b//a_gcd - a1*t//a_gcd) for t in Z``.
    """
    assert a1 > 0 and a2 > 0

    gamma = 1
    delta = 0
    epsilon = 0
    eta = 1

    while True:
        if a2 > 0:
            r = a1//a2
            a1 -= r*a2
            gamma -= r*delta
            epsilon -= r*eta
        elif a2 == 0:
            return a1, gamma, epsilon

        if a1 > 0:
            r = a2//a1
            a2 -= r*a1
            delta -= r*gamma
            eta -= r*epsilon
        elif a1 == 0:
            return a2, delta, eta


def _ceildiv(a, b):
    v, r = divmod(a, b)
    if r != 0:
        v += 1
    return v


def test_ramachandran():
    A_ub = 12345
    N_ub = 12345

    for n in xrange(2, 64):
        print("n =", n)
        max_count = 0
        max_count_args = None

        for k in xrange(10):
            A = sorted([random.randint(1, A_ub) for j in xrange(n)])
            N = [random.randint(1, N_ub) for j in xrange(n)]

            d_ub = sum(a*(n-1) for a, n in zip(A, N))
            for j in range(1000):
                d = random.randint(0, d_ub + 2)

                count = [0]
                r = ramachandran(A, N, d, count)

                import _slds
                r2 = _slds.slds(list(A), [0]*len(A), [qq-1 for qq in N], d)
                assert (r is None and r2 is None) or tuple(r) == tuple(r2), (r, r2)

                if r is None:
                    assert dfs_feasible(A, N, d) is None, (A, N, d, dfs_feasible(A, N, d))

                if count[0] > max_count:
                    max_count = count[0]
                    max_count_args = (A, N, d)

        # Time it
        timing = min(timeit.repeat(lambda: ramachandran(*max_count_args), repeat=3, number=1))
        timing_dfs = min(timeit.repeat(lambda: dfs_feasible(*max_count_args), repeat=3, number=1))
        print("worst case: {0}, timing: {1}s  [dfs_feasible: {2}s)".format(max_count_args, timing, timing_dfs))


def test_slds():
    import _slds

    A_ub = 12345
    N_ub = 12345

    for n in xrange(1, 64):
        print("n =", n)
        max_time = 0
        max_time_args = None

        for k in xrange(1000):
            A = [random.randint(1, A_ub) for j in xrange(n-1)] + [1]
            U = [random.randint(0, N_ub) for j in xrange(n-1)] + [16]
            L = [0]*n

            d_ub = sum(a*n for a, n in zip(A, U))
            for j in range(1000):
                d = random.randint(0, d_ub + 2)

                start = time.time()
                r = _slds.slds(A, L, U, d)
                timing = time.time() - start

                if r is not None:
                    assert sum(a*x for a, x in zip(A, r)) == d
                    assert all(lb <= x <= ub for lb, x, ub in zip(L, r, U))

                if timing > max_time:
                    max_time = timing
                    max_time_args = (A, L, U, d)
                    print("worst case: {0} ms, {1}".format(max_time*1e3, max_time_args))


def ramachandran(A, N, d, count=None):
    """
    Solve bounded Diophantine equation

    The problem considered is::

        A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == d
        0 <= x[i] < N[i]

    Solve via depth-first Euclid's algorithm, as explained in [1]

    References
    ----------
    .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
           a System of Linear Diophantine Equations with Bounded Variables''.
           Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
           182-192 (2006). doi:10.1007/11792086_14

    """
    assert all(n > 0 for n in N)
    assert all(a > 0 for a in A)
    assert len(A) == len(N)

    if len(A) == 1:
        if N[0] > 0:
            return (0,)
        else:
            return None
    elif len(A) == 0:
        return None

    L = [0]*len(A)
    U = [n - 1 for n in N]

    Ap, Lp, Up, Gamma, Epsilon = _ramachandran_pre(A, L, U, d)
    res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
                            d, len(A)-1, count)
    if res is not None:
        # check solution
        assert sum(a*x for a, x in zip(A, res)) == d, (A, N, d, res)
        assert all(0 <= x < n for n, x in zip(N, res)), (A, N, d, res)

    return res


def _ramachandran_pre(A, L, U, d):
    """
    Reduce problem to
    """
    a_gcd, gamma, epsilon = euclid(A[0], A[1])
    c1 = A[0]//a_gcd
    c2 = A[1]//a_gcd

    Ap = [0]*(len(A)-1)
    Gamma = [0]*(len(A)-1)
    Epsilon = [0]*(len(A)-1)
    Lp = [0]*(len(A)-2)
    Up = [0]*(len(A)-2)

    Ap[0] = a_gcd
    Gamma[0] = gamma
    Epsilon[0] = epsilon

    if len(A) > 2:
        Lp[0] = L[0]*c1 + L[1]*c2
        Up[0] = U[0]*c1 + U[1]*c2

    for j in xrange(2, len(A)):
        a_gcd, gamma, epsilon = euclid(Ap[j-2], A[j])

        c1 = Ap[j-2]//a_gcd
        c2 = A[j]//a_gcd

        Ap[j-1] = a_gcd
        Gamma[j-1] = gamma
        Epsilon[j-1] = epsilon

        if j < len(A) - 1:
            Lp[j-1] = c1 * Lp[j-2] + c2 * L[j]
            Up[j-1] = c1 * Up[j-2] + c2 * U[j]

    return Ap, Lp, Up, Gamma, Epsilon


def _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, b, v, count):
    """
    Algorithm in [1]
    """
    assert v >= 1

    if count is not None:
        count[0] += 1

    ap = Ap[v-1]
    gamma = Gamma[v-1]
    epsilon = Epsilon[v-1]

    if v == 1:
        a1 = A[0]
        l1 = L[0]
        u1 = U[0]
    else:
        a1 = Ap[v-2]
        l1 = Lp[v-2]
        u1 = Up[v-2]

    a2 = A[v]
    l2 = L[v]
    u2 = U[v]

    # Generate set of allowed solutions
    c, r = divmod(b, ap)
    if r != 0:
        return None

    x1 = gamma * c
    x2 = epsilon * c

    c1 = a2//ap
    c2 = a1//ap

    t_l = max(_ceildiv((l1 - x1), c1), _ceildiv((x2 - u2), c2))
    t_u = min((u1 - x1)//c1, (x2 - l2)//c2)

    if v == 1:
        if t_u >= t_l:
            x1p = x1 + c1*t_l
            x2p = x2 - c2*t_l
            return (x1p, x2p)
        else:
            return None

    for t in xrange(t_l, t_u + 1):
        x1p = x1 + c1*t
        x2p = x2 - c2*t
        res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
                                b - a2*x2p, v - 1, count)
        if res is not None:
            return res + (x2p,)
    return None

## setup.py
from numpy.distutils.core import Extension, setup
from Cython.Build import cythonize

extensions = [
    Extension('_slds', sources=['_slds.pyx', 'slds.c'])
]

setup(
    name='ramachandran',
    ext_modules=cythonize(extensions),
)

## slds.c
/*
  Solving a bounded Diophantine equation with positive coefficients.

  Copyright (c) 2015 Pauli Virtanen
  All rights reserved.
  Licensed under 3-clause BSD license, see LICENSE.txt.
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>


#include "slds.h"


#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))


/* Integer addition with overflow checking */
static slds_int_t
safe_add(slds_int_t a, slds_int_t b, char *overflow_flag)
{
    if (a > 0 && b > SLDS_INT_MAX - a) {
        *overflow_flag = 1;
    }
    else if (a < 0 && b < SLDS_INT_MIN - a) {
        *overflow_flag = 1;
    }
    return a + b;
}


/* Integer subtraction with overflow checking */
static slds_int_t
safe_sub(slds_int_t a, slds_int_t b, char *overflow_flag)
{
    if (a > 0 && b < a - SLDS_INT_MAX) {
        *overflow_flag = 1;
    }
    else if (a < 0 && b > a - SLDS_INT_MIN) {
        *overflow_flag = 1;
    }
    return a - b;
}


/* Integer multiplication with overflow checking */
static slds_int_t
safe_mul(slds_int_t a, slds_int_t b, char *overflow_flag)
{
    if (a > 0) {
        if (b > SLDS_INT_MAX / a || b < SLDS_INT_MIN / a) {
            *overflow_flag = 1;
        }
    }
    else if (a < 0) {
        if (b > 0 && a < SLDS_INT_MIN / b) {
            *overflow_flag = 1;
        }
        else if (b < 0 && a < SLDS_INT_MAX / b) {
            *overflow_flag = 1;
        }
    }
    return a * b;
}


/* Integer division with overflow checking */
static slds_int_t
safe_div(slds_int_t a, slds_int_t b, char *overflow_flag)
{
    if (b == 0 || (b == -1 && a <= SLDS_INT_MIN)) {
        *overflow_flag = 1;
    }
    return a / b;
}


/* Divide and round down (positive divisor; no overflows) */
static slds_int_t
floordiv(slds_int_t a, slds_int_t b)
{
    assert(b > 0);

    /* C division truncates */
    if (a > 0) {
        return a / b;
    }
    else {
        slds_int_t v, r;
        v = a / b;
        r = a % b;
        if (r != 0) {
            --v;  /* cannot overflow */
        }
        return v;
    }
}

/* Divide and round up (positive divisor; no overflows) */
static slds_int_t
ceildiv(slds_int_t a, slds_int_t b)
{
    assert(b > 0);

    if (a < 0) {
        return a / b;
    }
    else {
        slds_int_t v, r;
        v = a / b;
        r = a % b;
        if (r != 0) {
            ++v;  /* cannot overflow */
        }
        return v;
    }
}


/*
 * Euclid's algorithm for GCD.
 *
 * Solves for gamma*a1 + epsilon*a2 == gcd(a1, a2)
 * providing |gamma| < |a2|/gcd, |epsilon| < |a1|/gcd.
 */
static void
euclid(slds_int_t a1, slds_int_t a2, slds_int_t *a_gcd, slds_int_t *gamma, slds_int_t *epsilon)
{
    slds_int_t gamma1, gamma2, epsilon1, epsilon2, r;

    assert(a1 > 0);
    assert(a2 > 0);

    gamma1 = 1;
    gamma2 = 0;
    epsilon1 = 0;
    epsilon2 = 1;

    /* The numbers remain bounded by |a1|, |a2| during
       the iteration, so no integer overflows */
    while (1) {
        if (a2 > 0) {
            r = a1/a2;
            a1 -= r*a2;
            gamma1 -= r*gamma2;
            epsilon1 -= r*epsilon2;
        }
        else {
            *a_gcd = a1;
            *gamma = gamma1;
            *epsilon = epsilon1;
            break;
        }

        if (a1 > 0) {
            r = a2/a1;
            a2 -= r*a1;
            gamma2 -= r*gamma1;
            epsilon2 -= r*epsilon1;
        }
        else {
            *a_gcd = a2;
            *gamma = gamma2;
            *epsilon = epsilon2;
            break;
        }
    }
}


/*
 * Precompute GCD and bounds transformations
 */
static int
slds_precompute(unsigned int n,
                slds_entry_t *E,
                slds_entry_t *Ep,
                slds_int_t *Gamma, slds_int_t *Epsilon)
{
    slds_int_t a_gcd, gamma, epsilon, c1, c2;
    unsigned int j;
    char overflow = 0;

    assert(n >= 2);

    euclid(E[0].a, E[1].a, &a_gcd, &gamma, &epsilon);
    Ep[0].a = a_gcd;
    Gamma[0] = gamma;
    Epsilon[0] = epsilon;

    if (n > 2) {
        c1 = E[0].a / a_gcd;
        c2 = E[1].a / a_gcd;

        /* Ep[0].lb = E[0].lb * c1 + E[1].lb * c2; */
        Ep[0].lb = safe_add(safe_mul(E[0].lb, c1, &overflow),
                            safe_mul(E[1].lb, c2, &overflow), &overflow);
        /* Ep[0].ub = E[0].ub * c1 + E[1].ub * c2; */
        Ep[0].ub = safe_add(safe_mul(E[0].ub, c1, &overflow),
                            safe_mul(E[1].ub, c2, &overflow), &overflow);
        if (overflow) {
            return 1;
        }
    }

    for (j = 2; j < n; ++j) {
        euclid(Ep[j-2].a, E[j].a, &a_gcd, &gamma, &epsilon);
        Ep[j-1].a = a_gcd;
        Gamma[j-1] = gamma;
        Epsilon[j-1] = epsilon;

        if (j < n - 1) {
            c1 = Ep[j-2].a / a_gcd;
            c2 = E[j].a / a_gcd;

            /* Ep[j-1].lb = c1 * Ep[j-2].lb + c2 * E[j].lb; */
            Ep[j-1].lb = safe_add(safe_mul(c1, Ep[j-2].lb, &overflow),
                                  safe_mul(c2, E[j].lb, &overflow), &overflow);

            /* Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub; */
            Ep[j-1].ub = safe_add(safe_mul(c1, Ep[j-2].ub, &overflow),
                                  safe_mul(c2, E[j].ub, &overflow), &overflow);

            if (overflow) {
                return 1;
            }
        }
    }

    return 0;
}


/*
 * Depth-first bounded Euclid search
 */
static slds_status_t
slds_dfs(unsigned int v,
         slds_entry_t *E,
         slds_entry_t *Ep,
         slds_int_t *Gamma, slds_int_t *Epsilon,
         slds_int_t b,
         unsigned long max_work,
         slds_int_t *x,
         unsigned long *count)
{
    slds_int_t a_gcd, gamma, epsilon, a1, l1, u1, a2, l2, u2, c, r, x1, x2, c1, c2, t_l, t_u, t, b2;
    slds_status_t res;
    char overflow = 0;

    if (max_work > 0 && *count >= max_work) {
        return TOO_HARD;
    }
    ++*count;

    /* Fetch precomputed values for the reduced problem */
    if (v == 1) {
        a1 = E[0].a;
        l1 = E[0].lb;
        u1 = E[0].ub;
    }
    else {
        a1 = Ep[v-2].a;
        l1 = Ep[v-2].lb;
        u1 = Ep[v-2].ub;
    }

    a2 = E[v].a;
    l2 = E[v].lb;
    u2 = E[v].ub;

    a_gcd = Ep[v-1].a;
    gamma = Gamma[v-1];
    epsilon = Epsilon[v-1];

    /* Generate set of allowed solutions */
    c = b / a_gcd;
    r = b % a_gcd;
    if (r != 0) {
        return INFEASIBLE;
    }

    x1 = safe_mul(gamma, c, &overflow);
    x2 = safe_mul(epsilon, c, &overflow);

    c1 = a2 / a_gcd;
    c2 = a1 / a_gcd;

    t_l = MAX(ceildiv(safe_sub(l1, x1, &overflow), c1),
              ceildiv(safe_sub(x2, u2, &overflow), c2));
    t_u = MIN(floordiv(safe_sub(u1, x1, &overflow), c1),
              floordiv(safe_sub(x2, l2, &overflow), c2));

    if (overflow) {
        return INTEGER_OVERFLOW;
    }

    /* The bounds t_l, t_u ensure the x computed below do not overflow */

    if (v == 1) {
        /* Base case */
        if (t_u >= t_l) {
            x[0] = x1 + c1*t_l;
            x[1] = x2 - c2*t_l;
            return FEASIBLE;
        }
        return INFEASIBLE;
    }
    else {
        /* Recurse to all candidates */
        for (t = t_l; t <= t_u; ++t) {
            x[v] = x2 - c2*t;

            /* b2 = b - a2*x[v]; */
            b2 = safe_sub(b, safe_mul(a2, x[v], &overflow), &overflow);
            if (overflow) {
                return INTEGER_OVERFLOW;
            }

            res = slds_dfs(v-1, E, Ep, Gamma, Epsilon,
                           b2, max_work, x, count);
            if (res != INFEASIBLE) {
                return res;
            }
        }
        return INFEASIBLE;
    }
}


/*
 * Solve bounded Diophantine equation
 *
 * The problem considered is::
 *
 *     A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
 *     L[i] <= x[i] <= U[i]
 *     A[i] > 0
 *
 * Solve via depth-first Euclid's algorithm, as explained in [1]
 *
 * References
 * ----------
 * .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
 *        a System of Linear Diophantine Equations with Bounded Variables''.
 *        Algorithmic Number Theory, Lecture Notes in Computer Science **4076**,
 *        182-192 (2006). doi:10.1007/11792086_14
 */
slds_status_t
slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x)
{
    unsigned int j;

    for (j = 0; j < n; ++j) {
        if (E[j].a <= 0) {
            return ERROR;
        }
    }

    if (n <= 0) {
        return INFEASIBLE;
    }
    else if (n == 1) {
        if (b % E[0].a == 0) {
            x[0] = b / E[0].a;
            if (x[0] >= E[0].lb && x[0] <= E[0].ub) {
                return FEASIBLE;
            }
        }
        return INFEASIBLE;
    }
    else {
        slds_entry_t Ep[n];
        slds_int_t Epsilon[n], Gamma[n];
        unsigned long count = 0;

        if (slds_precompute(n, E, Ep, Gamma, Epsilon)) {
            return INTEGER_OVERFLOW;
        }
        return slds_dfs(n-1, E, Ep, Gamma, Epsilon, b, max_work, x, &count);
    }
}


static int
slds_sort_cmp(const void *xp, const void *yp)
{
    return ((slds_entry_t*)yp)->a - ((slds_entry_t*)xp)->a;
}


/*
 * Sort big coefficients first, apparently typically reduces search space
 */
void
slds_sort(unsigned int n, slds_entry_t *E)
{
    qsort(E, n, sizeof(slds_entry_t), slds_sort_cmp);
}


#ifdef TEST

#include <limits.h>

int main()
{
    #define n 2
    slds_entry_t E[n] = {
        {2, -1, SHRT_MAX},
        {3, -1, SHRT_MAX}
    };
    slds_int_t x[n];
    slds_int_t b = SHRT_MAX-7;
    unsigned long max_work = 0;
    slds_int_t sum;
    int j;
    slds_status_t res;
    clock_t start, end;
    unsigned long runcount = 0;

    slds_int_t gcd, gamma, epsilon;

    slds_sort(n, E);

    start = clock();
    while (1) {
        res = slds(n, E, b, max_work, x);
        end = clock();
        runcount += 1;
        if (end - start > 0.5*CLOCKS_PER_SEC) {
            break;
        }
    }
    printf("timing: %g us per run\n", (end - start)*1e6/CLOCKS_PER_SEC/runcount);

    if (res == FEASIBLE) {
        printf("problem is feasible:\n\n");
        sum = 0;
        printf("x = ");
        for (j = 0; j < n; ++j) {
            if (j > 0) {
                printf(", ");
            }
            printf("%ld", (long)x[j]);
        }
        printf("\n");
        for (j = 0; j < n; ++j) {
            if (j > 0) {
                printf(" + ");
            }
            printf("%ld*%ld", (long)E[j].a, (long)x[j]);
            sum += E[j].a*x[j];
        }
        printf(" = %ld = %ld", (long)sum, (long)b);
        if (sum == b) {
            printf("  OK\n");
        }
        else {
            printf("  FAIL\n");
        }
        for (j = 0; j < n; ++j) {
            printf("%ld <= %ld <= %ld", (long)E[j].lb, (long)x[j], (long)E[j].ub);
            if (E[j].lb <= x[j] && x[j] <= E[j].ub) {
                printf("  OK\n");
            }
            else {
                printf("  FAIL\n");
            }
        }
        return 0;
    }
    else if (res == INFEASIBLE) {
        printf("infeasible\n");
    }
    else if (res == ERROR) {
        printf("error\n");
    }
    else if (res == TOO_HARD) {
        printf("too much work done\n");
    }
    else if (res == INTEGER_OVERFLOW) {
        printf("integer overflow\n");
    }
    return 1;
}


#endif

## slds.h
#ifndef SLDS_H_
#define SLDS_H_

#include <limits.h>


typedef long slds_int_t;

#define SLDS_INT_MAX LONG_MAX
#define SLDS_INT_MIN LONG_MIN


typedef enum {
    FEASIBLE = 0,
    INFEASIBLE,
    TOO_HARD,
    INTEGER_OVERFLOW,
    ERROR
} slds_status_t;


typedef struct {
    slds_int_t a;
    slds_int_t lb;
    slds_int_t ub;
    unsigned int j;
} slds_entry_t;


slds_status_t slds(unsigned int n, slds_entry_t *E, slds_int_t b, unsigned long max_work, slds_int_t *x);

void slds_sort(unsigned int n, slds_entry_t *E);

#endif
	cimport libc.stdlib
	cimport cython
	cimport numpy as np


	cdef extern from "slds.h":
	ctypedef long slds_int_t

	ctypedef struct slds_entry_t:
	slds_int_t a
	slds_int_t lb
	slds_int_t ub
	unsigned int j

	ctypedef enum slds_status_t:
	FEASIBLE
	INFEASIBLE
	TOO_HARD
	INTEGER_OVERFLOW
	ERROR

	slds_status_t _slds "slds" (unsigned int n, slds_entry_t E, slds_int_t b, unsigned long max_work, slds_int_t x)
	void slds_sort(unsigned int n, slds_entry_t *E)


	class TooHardException(Exception):
	pass


	@cython.embedsignature(True)
	def slds(A, L, U, b, max_work=None):
	"""
	Solve bounded Diophantine equation

	The problem considered is::

	A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
	L[i] <= x[i] <= U[i]
	A[i] > 0

	Solve via depth-first Euclid's algorithm, as explained in [1]

	Parameters
	----------
	A, L, U : list of int
	Problem description, lists of integers of equal length
	b : int
	Right-hand side
	max_work : int, optional
	Maximum number of cases to consider.
	Default: no upper limit.

	Returns
	-------
	x : list of int or None
	List of integers describing one solution, or None if the problem
	is infeasible.

	Raises
	------
	TooHardException
	If max_work is given, raised when solution requires too much work.
	OverflowError
	If integer overflow prevented completing the calculation.
	ValueError
	If invalid input parameters

	References
	----------
	.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
	a System of Linear Diophantine Equations with Bounded Variables''.
	Algorithmic Number Theory, Lecture Notes in Computer Science 4076,
	182-192 (2006). doi:10.1007/11792086_14
	"""
	cdef ssize_t j, n
	cdef slds_entry_t *E
	cdef slds_int_t d
	cdef slds_int_t *x
	cdef slds_status_t res

	if max_work is None:
	max_work = 0

	n = len(A)

	if n < 0 or len(L) != n or len(U) != n:
	raise ValueError("Invalid lengths of A, L, U")

	E = <slds_entry_t*>libc.stdlib.calloc(n, sizeof(slds_entry_t))
	x = <slds_int_t*>libc.stdlib.calloc(n, sizeof(slds_int_t))
	try:
	if E == NULL or x == NULL:
	raise MemoryError()

	for j in range(n):
	E[j].a = A[j]
	E[j].lb = L[j]
	E[j].ub = U[j]
	E[j].j = j

	slds_sort(n, E)
	res = _slds(n, E, b, max_work, x)
	if res == FEASIBLE:
	x_list = [0]*n
	for j in range(n):
	x_list[E[j].j] = x[j]
	return x_list
	elif res == INFEASIBLE:
	return None
	elif res == TOO_HARD:
	raise TooHardException()
	elif res == INTEGER_OVERFLOW:
	raise OverflowError()
	else:
	raise ValueError("Invalid inputs")
	finally:
	libc.stdlib.free(E)
	libc.stdlib.free(x)


	def may_share_memory(np.ndarray a_arr, np.ndarray b_arr, max_work=None):
	base_a = int(<Py_ssize_t>a_arr.data)
	base_b = int(<Py_ssize_t>b_arr.data)

	itemsize_a = int(a_arr.dtype.itemsize)
	itemsize_b = int(b_arr.dtype.itemsize)

	shape_a = [a_arr.shape[j] for j in range(a_arr.ndim)] + [itemsize_a]
	shape_b = [b_arr.shape[j] for j in range(b_arr.ndim)] + [itemsize_b]

	stride_a = [a_arr.strides[j] for j in range(a_arr.ndim)] + [1]
	stride_b = [b_arr.strides[j] for j in range(b_arr.ndim)] + [1]

	# Ensure non-empty
	if min(shape_a) == 0 or min(shape_b) == 0:
	return False

	# Drop zero strides and unit dimensions
	shape_a2 = []
	stride_a2 = []
	for n, s in zip(shape_a, stride_a):
	if n != 1 and s != 0:
	shape_a2.append(n)
	stride_a2.append(s)
	shape_a = shape_a2
	stride_a = stride_a2

	shape_b2 = []
	stride_b2 = []
	for n, s in zip(shape_b, stride_b):
	if n != 1 and s != 0:
	shape_b2.append(n)
	stride_b2.append(s)
	shape_b = shape_b2
	stride_b = stride_b2

	# Both are scalars
	if len(shape_a) == 0 and len(shape_b) == 0:
	return base_a == base_b

	# Make a strides positive
	for j, s in enumerate(stride_a):
	if s < 0:
	stride_a[j] = -s
	base_a += (shape_a[j] - 1) * s

	# Make b strides negative
	for j, s in enumerate(stride_b):
	if s > 0:
	stride_b[j] = -s
	base_b += (shape_b[j] - 1) * s

	# Diophantine form
	A = stride_a + [-s for s in stride_b]
	N = shape_a + shape_b
	d = base_b - base_a

	# Sort and combine equal coefficients
	A2 = []
	N2 = []
	last_a = 0
	for a, n in sorted(zip(A, N)):
	if a == last_a:
	N2[-1] += n - 1
	else:
	last_a = a
	A2.append(a)
	N2.append(n)
	A = A2
	N = N2

	# Solve
	L = [0]*len(A)
	U = [n-1 for n in N]

	try:
	return slds(A, L, U, d, max_work) is not None
	except TooHardException:
	return True
	# --python--
	import os

	def make_ext(modname, pyxfilename):
	from distutils.extension import Extension
	pth = os.path.abspath(os.path.dirname(pyxfilename))
	return Extension(name='_slds',
	sources=[pyxfilename, os.path.join(pth, 'slds.c')],
	include_dirs=[pth])
	from __future__ import division, absolute_import, print_function

	import itertools
	import random
	import timeit
	import time
	from fractions import gcd
	from six.moves import xrange

	# XXX: just a quick try, didn't yet check if it's fully correct


	def maybe_feasible(A, N, d, f):
	"""
	Decide whether a bounded Diophantine equation might have a solution.

	The problem considered is::

	Given integers {A}, {N}, d, f, with 0 < A[0] < A[1] < ... < A[n-1]
	and N[i] > 0, are there integers x[0], ..., x[n-1] such that

	d <= A[0]x[0] + ... + A[n-1]x[n-1] <= f
	0 <= x[i] < N[i]

	Parameters
	----------
	A : list of int
	N : list of int
	d : int

	Returns
	-------
	maybe_feasible : bool
	If True, the problem might or might not have solution.
	If False, the problem does not have solution.

	"""

	assert A[0] > 0
	assert all(a < b for a, b in zip(A[:-1], A[1:]))
	assert all(n > 0 for n in N)

	# Simple bounds checks
	if f < 0:
	return False

	if sum(a*(n-1) for a, n in zip(A, N)) < d:
	return False

	if d <= 0:
	return True

	# Check gcd
	A_gcd = A[0]
	for a in A[1:]:
	A_gcd = gcd(a, A_gcd)

	if not any_divides(d, f, A_gcd):
	return False

	# Consider feasibility of remaining range
	if len(A) > 1:
	return maybe_feasible(A[1:], N[1:], d - A[0]*(N[0]-1), f)
	else:
	return True


	def any_divides(a, b, x):
	"""
	any([y % x == 0 for y in range(a, b+1)])
	"""
	assert x > 0 and b >= a and a >= 0
	r = a % x
	if r == 0:
	return True
	return (b - a + r) >= x


	def dfs_feasible(A, N, d, count=None):
	"""
	Same as maybe_feasible but solve exactly (with f = d)
	and return found solution.
	"""

	assert all(n > 0 for n in N)
	assert all(a > 0 for a in A)

	if count is not None:
	count[0] += 1

	if d < 0:
	return None

	if len(A) == 1:
	k, r = divmod(d, A[0])
	if r != 0 or k >= N[0]:
	return None
	return (k,)
	else:
	# Check gcd
	A_gcd = A[0]
	for a in A[1:]:
	A_gcd = gcd(a, A_gcd)

	if d % A_gcd != 0:
	return None

	# Iterate over candidates
	j_max = min(d // A[-1], N[-1] - 1)
	j_min = max(0, (d - sum(a*(n-1) for a, n in zip(A[:-1], N[:-1]))) // A[-1])
	for j in xrange(j_max, j_min-1, -1):
	v = dfs_feasible(A[:-1], N[:-1], d - j*A[-1], count)
	if v:
	return v + (j,)
	return None


	def may_share_memory(base_a, shape_a, stride_a,
	base_b, shape_b, stride_b):
	shape_a = list(shape_a)
	shape_b = list(shape_b)

	stride_a = list(stride_a)
	stride_b = list(stride_b)

	# Ensure non-empty
	if min(shape_a) == 0 or min(shape_b) == 0:
	return False

	# Drop zero strides and unit dimensions
	shape_a2 = []
	stride_a2 = []
	for n, s in zip(shape_a, stride_a):
	if n != 1 and s != 0:
	shape_a2.append(n)
	stride_a2.append(s)
	shape_a = shape_a2
	stride_a = stride_a2

	shape_b2 = []
	stride_b2 = []
	for n, s in zip(shape_b, stride_b):
	if n != 1 and s != 0:
	shape_b2.append(n)
	stride_b2.append(s)
	shape_b = shape_b2
	stride_b = stride_b2

	# Both are scalars
	if len(shape_a) == 0 and len(shape_b) == 0:
	return base_a == base_b

	# Make a strides positive
	for j, s in enumerate(stride_a):
	if s < 0:
	stride_a[j] = -s
	base_a += (shape_a[j] - 1) * s

	# Make b strides negative
	for j, s in enumerate(stride_b):
	if s > 0:
	stride_b[j] = -s
	base_b += (shape_b[j] - 1) * s

	# Diophantine form
	A = stride_a + [-s for s in stride_b]
	N = shape_a + shape_b
	d = base_b - base_a

	# Sort and combine equal coefficients
	A2 = []
	N2 = []
	last_a = 0
	for a, n in sorted(zip(A, N)):
	if a == last_a:
	N2[-1] += n - 1
	else:
	last_a = a
	A2.append(a)
	N2.append(n)
	A = A2
	N = N2

	# Heuristics
	maybe = maybe_feasible(A, N, d, d)

	# Exact (brute-force, for checking only)
	exact = any(sum(v) == d for v in itertools.product((range(0, an, a) for a, n in zip(A, N))))

	if not maybe:
	assert not exact

	dfs = dfs_feasible(A, N, d)
	assert bool(dfs) == exact, (dfs, exact)
	assert dfs is None or sum(a*x for a, x in zip(A, dfs)) == d

	print(maybe, exact, dfs)
	return maybe


	def test():
	import numpy as np

	x = np.zeros((5, 5, 4), dtype=np.int8)

	a = x[:,:3,:]
	b = x[:,3:,:]
	assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
	b.__array_interface__['data'][0], b.shape, b.strides)

	b = b.reshape(5, 4, 2).T
	assert b.base is x
	assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
	b.__array_interface__['data'][0], b.shape, b.strides)

	b = b[1:,1:,1:]
	assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
	b.__array_interface__['data'][0], b.shape, b.strides)

	b = x[:,2:,:]
	assert may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
	b.__array_interface__['data'][0], b.shape, b.strides)

	b = b.copy()
	assert not may_share_memory(a.__array_interface__['data'][0], a.shape, a.strides,
	b.__array_interface__['data'][0], b.shape, b.strides)


	def test_dfs():
	# Try to get some idea of worst case search depth
	A = sorted(set(random.randint(990, 1100) for j in xrange(3)))
	N = [10000//a for a in A]

	print("A =", A, "N = ", N)

	d_ub = sum(a*(n-1) for a, n in zip(A, N))
	print("d_ub =", d_ub)

	c_max = 0
	d_max = None
	x_max = None

	for d in xrange(2*d_ub):
	count = [0]
	x = dfs_feasible(A, N, d, count)

	maybe = maybe_feasible(A, N, d, d)
	if not maybe:
	assert x is None

	if count[0] > c_max:
	c_max = count[0]
	d_max = d
	x_max = x
	print("count = {}, d = {}, x = {}".format(c_max, d_max, x_max))


	def test_any_divides():
	for a in xrange(0, 17):
	for b in xrange(a, a + 31):
	for x in xrange(1, 17):
	assert any_divides(a, b, x) == any([y % x == 0 for y in xrange(a, b+1)])


	def euclid(a1, a2):
	"""
	Euclid's algorithm

	Returns
	-------
	a_gcd, gamma, epsilon
	GCD and numbers such that ``gammaa1 + epsilona2 == a_gcd``.
	Solutions to ``a1x1 + a2x2 == b`` are generated by
	``(gammab//a_gcd + a2t//a_gcd, epsilonb//a_gcd - a1t//a_gcd) for t in Z``.
	"""
	assert a1 > 0 and a2 > 0

	gamma = 1
	delta = 0
	epsilon = 0
	eta = 1

	while True:
	if a2 > 0:
	r = a1//a2
	a1 -= r*a2
	gamma -= r*delta
	epsilon -= r*eta
	elif a2 == 0:
	return a1, gamma, epsilon

	if a1 > 0:
	r = a2//a1
	a2 -= r*a1
	delta -= r*gamma
	eta -= r*epsilon
	elif a1 == 0:
	return a2, delta, eta


	def _ceildiv(a, b):
	v, r = divmod(a, b)
	if r != 0:
	v += 1
	return v


	def test_ramachandran():
	A_ub = 12345
	N_ub = 12345

	for n in xrange(2, 64):
	print("n =", n)
	max_count = 0
	max_count_args = None

	for k in xrange(10):
	A = sorted([random.randint(1, A_ub) for j in xrange(n)])
	N = [random.randint(1, N_ub) for j in xrange(n)]

	d_ub = sum(a*(n-1) for a, n in zip(A, N))
	for j in range(1000):
	d = random.randint(0, d_ub + 2)

	count = [0]
	r = ramachandran(A, N, d, count)

	import _slds
	r2 = _slds.slds(list(A), [0]*len(A), [qq-1 for qq in N], d)
	assert (r is None and r2 is None) or tuple(r) == tuple(r2), (r, r2)

	if r is None:
	assert dfs_feasible(A, N, d) is None, (A, N, d, dfs_feasible(A, N, d))

	if count[0] > max_count:
	max_count = count[0]
	max_count_args = (A, N, d)

	# Time it
	timing = min(timeit.repeat(lambda: ramachandran(*max_count_args), repeat=3, number=1))
	timing_dfs = min(timeit.repeat(lambda: dfs_feasible(*max_count_args), repeat=3, number=1))
	print("worst case: {0}, timing: {1}s [dfs_feasible: {2}s)".format(max_count_args, timing, timing_dfs))


	def test_slds():
	import _slds

	A_ub = 12345
	N_ub = 12345

	for n in xrange(1, 64):
	print("n =", n)
	max_time = 0
	max_time_args = None

	for k in xrange(1000):
	A = [random.randint(1, A_ub) for j in xrange(n-1)] + [1]
	U = [random.randint(0, N_ub) for j in xrange(n-1)] + [16]
	L = [0]*n

	d_ub = sum(a*n for a, n in zip(A, U))
	for j in range(1000):
	d = random.randint(0, d_ub + 2)

	start = time.time()
	r = _slds.slds(A, L, U, d)
	timing = time.time() - start

	if r is not None:
	assert sum(a*x for a, x in zip(A, r)) == d
	assert all(lb <= x <= ub for lb, x, ub in zip(L, r, U))

	if timing > max_time:
	max_time = timing
	max_time_args = (A, L, U, d)
	print("worst case: {0} ms, {1}".format(max_time*1e3, max_time_args))


	def ramachandran(A, N, d, count=None):
	"""
	Solve bounded Diophantine equation

	The problem considered is::

	A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == d
	0 <= x[i] < N[i]

	Solve via depth-first Euclid's algorithm, as explained in [1]

	References
	----------
	.. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
	a System of Linear Diophantine Equations with Bounded Variables''.
	Algorithmic Number Theory, Lecture Notes in Computer Science 4076,
	182-192 (2006). doi:10.1007/11792086_14

	"""
	assert all(n > 0 for n in N)
	assert all(a > 0 for a in A)
	assert len(A) == len(N)

	if len(A) == 1:
	if N[0] > 0:
	return (0,)
	else:
	return None
	elif len(A) == 0:
	return None

	L = [0]*len(A)
	U = [n - 1 for n in N]

	Ap, Lp, Up, Gamma, Epsilon = _ramachandran_pre(A, L, U, d)
	res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
	d, len(A)-1, count)
	if res is not None:
	# check solution
	assert sum(a*x for a, x in zip(A, res)) == d, (A, N, d, res)
	assert all(0 <= x < n for n, x in zip(N, res)), (A, N, d, res)

	return res


	def _ramachandran_pre(A, L, U, d):
	"""
	Reduce problem to
	"""
	a_gcd, gamma, epsilon = euclid(A[0], A[1])
	c1 = A[0]//a_gcd
	c2 = A[1]//a_gcd

	Ap = [0]*(len(A)-1)
	Gamma = [0]*(len(A)-1)
	Epsilon = [0]*(len(A)-1)
	Lp = [0]*(len(A)-2)
	Up = [0]*(len(A)-2)

	Ap[0] = a_gcd
	Gamma[0] = gamma
	Epsilon[0] = epsilon

	if len(A) > 2:
	Lp[0] = L[0]c1 + L[1]c2
	Up[0] = U[0]c1 + U[1]c2

	for j in xrange(2, len(A)):
	a_gcd, gamma, epsilon = euclid(Ap[j-2], A[j])

	c1 = Ap[j-2]//a_gcd
	c2 = A[j]//a_gcd

	Ap[j-1] = a_gcd
	Gamma[j-1] = gamma
	Epsilon[j-1] = epsilon

	if j < len(A) - 1:
	Lp[j-1] = c1 * Lp[j-2] + c2 * L[j]
	Up[j-1] = c1 * Up[j-2] + c2 * U[j]

	return Ap, Lp, Up, Gamma, Epsilon


	def _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon, b, v, count):
	"""
	Algorithm in [1]
	"""
	assert v >= 1

	if count is not None:
	count[0] += 1

	ap = Ap[v-1]
	gamma = Gamma[v-1]
	epsilon = Epsilon[v-1]

	if v == 1:
	a1 = A[0]
	l1 = L[0]
	u1 = U[0]
	else:
	a1 = Ap[v-2]
	l1 = Lp[v-2]
	u1 = Up[v-2]

	a2 = A[v]
	l2 = L[v]
	u2 = U[v]

	# Generate set of allowed solutions
	c, r = divmod(b, ap)
	if r != 0:
	return None

	x1 = gamma * c
	x2 = epsilon * c

	c1 = a2//ap
	c2 = a1//ap

	t_l = max(_ceildiv((l1 - x1), c1), _ceildiv((x2 - u2), c2))
	t_u = min((u1 - x1)//c1, (x2 - l2)//c2)

	if v == 1:
	if t_u >= t_l:
	x1p = x1 + c1*t_l
	x2p = x2 - c2*t_l
	return (x1p, x2p)
	else:
	return None

	for t in xrange(t_l, t_u + 1):
	x1p = x1 + c1*t
	x2p = x2 - c2*t
	res = _ramachandran_dfs(A, L, U, Ap, Lp, Up, Gamma, Epsilon,
	b - a2*x2p, v - 1, count)
	if res is not None:
	return res + (x2p,)
	return None
	from numpy.distutils.core import Extension, setup
	from Cython.Build import cythonize

	extensions = [
	Extension('_slds', sources=['_slds.pyx', 'slds.c'])
	]

	setup(
	name='ramachandran',
	ext_modules=cythonize(extensions),
	)
	/*
	Solving a bounded Diophantine equation with positive coefficients.

	Copyright (c) 2015 Pauli Virtanen
	All rights reserved.
	Licensed under 3-clause BSD license, see LICENSE.txt.
	*/
	#include <stdlib.h>
	#include <stdio.h>
	#include <assert.h>
	#include <time.h>


	#include "slds.h"


	#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
	#define MIN(a, b) (((a) <= (b)) ? (a) : (b))


	/* Integer addition with overflow checking */
	static slds_int_t
	safe_add(slds_int_t a, slds_int_t b, char *overflow_flag)
	{
	if (a > 0 && b > SLDS_INT_MAX - a) {
	*overflow_flag = 1;
	}
	else if (a < 0 && b < SLDS_INT_MIN - a) {
	*overflow_flag = 1;
	}
	return a + b;
	}


	/* Integer subtraction with overflow checking */
	static slds_int_t
	safe_sub(slds_int_t a, slds_int_t b, char *overflow_flag)
	{
	if (a > 0 && b < a - SLDS_INT_MAX) {
	*overflow_flag = 1;
	}
	else if (a < 0 && b > a - SLDS_INT_MIN) {
	*overflow_flag = 1;
	}
	return a - b;
	}


	/* Integer multiplication with overflow checking */
	static slds_int_t
	safe_mul(slds_int_t a, slds_int_t b, char *overflow_flag)
	{
	if (a > 0) {
	if (b > SLDS_INT_MAX / a \|\| b < SLDS_INT_MIN / a) {
	*overflow_flag = 1;
	}
	}
	else if (a < 0) {
	if (b > 0 && a < SLDS_INT_MIN / b) {
	*overflow_flag = 1;
	}
	else if (b < 0 && a < SLDS_INT_MAX / b) {
	*overflow_flag = 1;
	}
	}
	return a * b;
	}


	/* Integer division with overflow checking */
	static slds_int_t
	safe_div(slds_int_t a, slds_int_t b, char *overflow_flag)
	{
	if (b == 0 \|\| (b == -1 && a <= SLDS_INT_MIN)) {
	*overflow_flag = 1;
	}
	return a / b;
	}


	/* Divide and round down (positive divisor; no overflows) */
	static slds_int_t
	floordiv(slds_int_t a, slds_int_t b)
	{
	assert(b > 0);

	/* C division truncates */
	if (a > 0) {
	return a / b;
	}
	else {
	slds_int_t v, r;
	v = a / b;
	r = a % b;
	if (r != 0) {
	--v; /* cannot overflow */
	}
	return v;
	}
	}

	/* Divide and round up (positive divisor; no overflows) */
	static slds_int_t
	ceildiv(slds_int_t a, slds_int_t b)
	{
	assert(b > 0);

	if (a < 0) {
	return a / b;
	}
	else {
	slds_int_t v, r;
	v = a / b;
	r = a % b;
	if (r != 0) {
	++v; /* cannot overflow */
	}
	return v;
	}
	}


	/*
	* Euclid's algorithm for GCD.
	*
	* Solves for gammaa1 + epsilona2 == gcd(a1, a2)
	* providing \|gamma\| < \|a2\|/gcd, \|epsilon\| < \|a1\|/gcd.
	*/
	static void
	euclid(slds_int_t a1, slds_int_t a2, slds_int_t a_gcd, slds_int_t gamma, slds_int_t *epsilon)
	{
	slds_int_t gamma1, gamma2, epsilon1, epsilon2, r;

	assert(a1 > 0);
	assert(a2 > 0);

	gamma1 = 1;
	gamma2 = 0;
	epsilon1 = 0;
	epsilon2 = 1;

	/* The numbers remain bounded by \|a1\|, \|a2\| during
	the iteration, so no integer overflows */
	while (1) {
	if (a2 > 0) {
	r = a1/a2;
	a1 -= r*a2;
	gamma1 -= r*gamma2;
	epsilon1 -= r*epsilon2;
	}
	else {
	*a_gcd = a1;
	*gamma = gamma1;
	*epsilon = epsilon1;
	break;
	}

	if (a1 > 0) {
	r = a2/a1;
	a2 -= r*a1;
	gamma2 -= r*gamma1;
	epsilon2 -= r*epsilon1;
	}
	else {
	*a_gcd = a2;
	*gamma = gamma2;
	*epsilon = epsilon2;
	break;
	}
	}
	}


	/*
	* Precompute GCD and bounds transformations
	*/
	static int
	slds_precompute(unsigned int n,
	slds_entry_t *E,
	slds_entry_t *Ep,
	slds_int_t Gamma, slds_int_t Epsilon)
	{
	slds_int_t a_gcd, gamma, epsilon, c1, c2;
	unsigned int j;
	char overflow = 0;

	assert(n >= 2);

	euclid(E[0].a, E[1].a, &a_gcd, &gamma, &epsilon);
	Ep[0].a = a_gcd;
	Gamma[0] = gamma;
	Epsilon[0] = epsilon;

	if (n > 2) {
	c1 = E[0].a / a_gcd;
	c2 = E[1].a / a_gcd;

	/* Ep[0].lb = E[0].lb * c1 + E[1].lb * c2; */
	Ep[0].lb = safe_add(safe_mul(E[0].lb, c1, &overflow),
	safe_mul(E[1].lb, c2, &overflow), &overflow);
	/* Ep[0].ub = E[0].ub * c1 + E[1].ub * c2; */
	Ep[0].ub = safe_add(safe_mul(E[0].ub, c1, &overflow),
	safe_mul(E[1].ub, c2, &overflow), &overflow);
	if (overflow) {
	return 1;
	}
	}

	for (j = 2; j < n; ++j) {
	euclid(Ep[j-2].a, E[j].a, &a_gcd, &gamma, &epsilon);
	Ep[j-1].a = a_gcd;
	Gamma[j-1] = gamma;
	Epsilon[j-1] = epsilon;

	if (j < n - 1) {
	c1 = Ep[j-2].a / a_gcd;
	c2 = E[j].a / a_gcd;

	/* Ep[j-1].lb = c1 * Ep[j-2].lb + c2 * E[j].lb; */
	Ep[j-1].lb = safe_add(safe_mul(c1, Ep[j-2].lb, &overflow),
	safe_mul(c2, E[j].lb, &overflow), &overflow);

	/* Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub; */
	Ep[j-1].ub = safe_add(safe_mul(c1, Ep[j-2].ub, &overflow),
	safe_mul(c2, E[j].ub, &overflow), &overflow);

	if (overflow) {
	return 1;
	}
	}
	}

	return 0;
	}


	/*
	* Depth-first bounded Euclid search
	*/
	static slds_status_t
	slds_dfs(unsigned int v,
	slds_entry_t *E,
	slds_entry_t *Ep,
	slds_int_t Gamma, slds_int_t Epsilon,
	slds_int_t b,
	unsigned long max_work,
	slds_int_t *x,
	unsigned long *count)
	{
	slds_int_t a_gcd, gamma, epsilon, a1, l1, u1, a2, l2, u2, c, r, x1, x2, c1, c2, t_l, t_u, t, b2;
	slds_status_t res;
	char overflow = 0;

	if (max_work > 0 && *count >= max_work) {
	return TOO_HARD;
	}
	++*count;

	/* Fetch precomputed values for the reduced problem */
	if (v == 1) {
	a1 = E[0].a;
	l1 = E[0].lb;
	u1 = E[0].ub;
	}
	else {
	a1 = Ep[v-2].a;
	l1 = Ep[v-2].lb;
	u1 = Ep[v-2].ub;
	}

	a2 = E[v].a;
	l2 = E[v].lb;
	u2 = E[v].ub;

	a_gcd = Ep[v-1].a;
	gamma = Gamma[v-1];
	epsilon = Epsilon[v-1];

	/* Generate set of allowed solutions */
	c = b / a_gcd;
	r = b % a_gcd;
	if (r != 0) {
	return INFEASIBLE;
	}

	x1 = safe_mul(gamma, c, &overflow);
	x2 = safe_mul(epsilon, c, &overflow);

	c1 = a2 / a_gcd;
	c2 = a1 / a_gcd;

	t_l = MAX(ceildiv(safe_sub(l1, x1, &overflow), c1),
	ceildiv(safe_sub(x2, u2, &overflow), c2));
	t_u = MIN(floordiv(safe_sub(u1, x1, &overflow), c1),
	floordiv(safe_sub(x2, l2, &overflow), c2));

	if (overflow) {
	return INTEGER_OVERFLOW;
	}

	/* The bounds t_l, t_u ensure the x computed below do not overflow */

	if (v == 1) {
	/* Base case */
	if (t_u >= t_l) {
	x[0] = x1 + c1*t_l;
	x[1] = x2 - c2*t_l;
	return FEASIBLE;
	}
	return INFEASIBLE;
	}
	else {
	/* Recurse to all candidates */
	for (t = t_l; t <= t_u; ++t) {
	x[v] = x2 - c2*t;

	/* b2 = b - a2x[v]; /
	b2 = safe_sub(b, safe_mul(a2, x[v], &overflow), &overflow);
	if (overflow) {
	return INTEGER_OVERFLOW;
	}

	res = slds_dfs(v-1, E, Ep, Gamma, Epsilon,
	b2, max_work, x, count);
	if (res != INFEASIBLE) {
	return res;
	}
	}
	return INFEASIBLE;
	}
	}


	/*
	* Solve bounded Diophantine equation
	*
	* The problem considered is::
	*
	* A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
	* L[i] <= x[i] <= U[i]
	* A[i] > 0
	*
	* Solve via depth-first Euclid's algorithm, as explained in [1]
	*
	* References
	* ----------
	* .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving
	* a System of Linear Diophantine Equations with Bounded Variables''.
	* Algorithmic Number Theory, Lecture Notes in Computer Science 4076,
	* 182-192 (2006). doi:10.1007/11792086_14
	*/
	slds_status_t
	slds(unsigned int n, slds_entry_t E, slds_int_t b, unsigned long max_work, slds_int_t x)
	{
	unsigned int j;

	for (j = 0; j < n; ++j) {
	if (E[j].a <= 0) {
	return ERROR;
	}
	}

	if (n <= 0) {
	return INFEASIBLE;
	}
	else if (n == 1) {
	if (b % E[0].a == 0) {
	x[0] = b / E[0].a;
	if (x[0] >= E[0].lb && x[0] <= E[0].ub) {
	return FEASIBLE;
	}
	}
	return INFEASIBLE;
	}
	else {
	slds_entry_t Ep[n];
	slds_int_t Epsilon[n], Gamma[n];
	unsigned long count = 0;

	if (slds_precompute(n, E, Ep, Gamma, Epsilon)) {
	return INTEGER_OVERFLOW;
	}
	return slds_dfs(n-1, E, Ep, Gamma, Epsilon, b, max_work, x, &count);
	}
	}


	static int
	slds_sort_cmp(const void xp, const void yp)
	{
	return ((slds_entry_t)yp)->a - ((slds_entry_t)xp)->a;
	}


	/*
	* Sort big coefficients first, apparently typically reduces search space
	*/
	void
	slds_sort(unsigned int n, slds_entry_t *E)
	{
	qsort(E, n, sizeof(slds_entry_t), slds_sort_cmp);
	}


	#ifdef TEST

	#include <limits.h>

	int main()
	{
	#define n 2
	slds_entry_t E[n] = {
	{2, -1, SHRT_MAX},
	{3, -1, SHRT_MAX}
	};
	slds_int_t x[n];
	slds_int_t b = SHRT_MAX-7;
	unsigned long max_work = 0;
	slds_int_t sum;
	int j;
	slds_status_t res;
	clock_t start, end;
	unsigned long runcount = 0;

	slds_int_t gcd, gamma, epsilon;

	slds_sort(n, E);

	start = clock();
	while (1) {
	res = slds(n, E, b, max_work, x);
	end = clock();
	runcount += 1;
	if (end - start > 0.5*CLOCKS_PER_SEC) {
	break;
	}
	}
	printf("timing: %g us per run\n", (end - start)*1e6/CLOCKS_PER_SEC/runcount);

	if (res == FEASIBLE) {
	printf("problem is feasible:\n\n");
	sum = 0;
	printf("x = ");
	for (j = 0; j < n; ++j) {
	if (j > 0) {
	printf(", ");
	}
	printf("%ld", (long)x[j]);
	}
	printf("\n");
	for (j = 0; j < n; ++j) {
	if (j > 0) {
	printf(" + ");
	}
	printf("%ld*%ld", (long)E[j].a, (long)x[j]);
	sum += E[j].a*x[j];
	}
	printf(" = %ld = %ld", (long)sum, (long)b);
	if (sum == b) {
	printf(" OK\n");
	}
	else {
	printf(" FAIL\n");
	}
	for (j = 0; j < n; ++j) {
	printf("%ld <= %ld <= %ld", (long)E[j].lb, (long)x[j], (long)E[j].ub);
	if (E[j].lb <= x[j] && x[j] <= E[j].ub) {
	printf(" OK\n");
	}
	else {
	printf(" FAIL\n");
	}
	}
	return 0;
	}
	else if (res == INFEASIBLE) {
	printf("infeasible\n");
	}
	else if (res == ERROR) {
	printf("error\n");
	}
	else if (res == TOO_HARD) {
	printf("too much work done\n");
	}
	else if (res == INTEGER_OVERFLOW) {
	printf("integer overflow\n");
	}
	return 1;
	}


	#endif
	#ifndef SLDS_H_
	#define SLDS_H_

	#include <limits.h>


	typedef long slds_int_t;

	#define SLDS_INT_MAX LONG_MAX
	#define SLDS_INT_MIN LONG_MIN


	typedef enum {
	FEASIBLE = 0,
	INFEASIBLE,
	TOO_HARD,
	INTEGER_OVERFLOW,
	ERROR
	} slds_status_t;


	typedef struct {
	slds_int_t a;
	slds_int_t lb;
	slds_int_t ub;
	unsigned int j;
	} slds_entry_t;


	slds_status_t slds(unsigned int n, slds_entry_t E, slds_int_t b, unsigned long max_work, slds_int_t x);

	void slds_sort(unsigned int n, slds_entry_t *E);

	#endif