chaoxu/README.MD

## README.MD

      
    Raw
  

              README.MD
            
          
    This is a python implementation of a subset sum algorithm by Konstantinos Koiliaris and me.
Given a set of n positive integers, the algorithm finds all the values t<=u such that
there exists a subset that sums to t.
The running time is O(sqrt(n)u) with some extra polylog factors.
There is also a C++ implementation of the standard dynamic programming algorithm for subset sum.
The DP table gets packed into 64 bit unsigned integers.
It's probably the fastest possibe implementation of the O(nu) time algorithm without getting into ASM and parallelism.
Notes:

There is a small difference in the implementation described in the paper.
There is no exponential interval partition before applying the recursive algorithm on each interval.
A careful analysis shows the running time is the same.
It should be clear the algorithm is not practical at all.
Some engineering is possible. For example, in lower levels of the recursion, use the O(nu) time algorithm instead.
This randomized algorithm by Karl Bringmann might be more practical.


## subset_sum.cpp
#include <vector>
#include <stdint.h>

bool is_bit_set(uint64_t num, int position){
  return 1 == ((num >> position) & 1);
}

std::vector<int>  subset_sums_bitset(std::vector<int>& v, int u){
  int n=v.size();
  int m=1+u/64;
  uint64_t* dp  = new uint64_t[m];
  memset(dp, 0, m * sizeof dp[0]);
  dp[0] = 1;

  for(int i=0;i<n;i++){
    int k = v[i];
    if(k>u){
      continue;
    }
    int q = k/64;
    int r = k%64;
    // shift by 64 is undefined behavior :(
    if(r!=0){
      for(int j=m-1;j-q>=1;j--){
        dp[j]|= dp[j-q]<<r;
        dp[j]|= dp[j-q-1]>>(64-r);
      }
      dp[q]|= dp[0]<<r;
    }else{
      for(int j=m-1;j-q>=0;j--){
        dp[j]|= dp[j-q];
      }
    }
  }
  std::vector<int> result;
  for(int i=0;i<m;i++){
    for(int j=0;j<64;j++){
      if(is_bit_set(dp[i],j)){
        result.push_back(i*64+j);
      }
    }
  }
  delete[] dp;
  return result;
}

## subset_sum.py
from numpy import transpose, nonzero, select, zeros
from scipy.signal import fftconvolve
from operator import itemgetter
from itertools import takewhile, product

# to test, try the following
# > import random
# > xs = list(set(random.sample(xrange(10000), 500)))
# > subset_sums_up_to_u(xs,50000) == naive_subset_sums_up_to_u(xs,50000)

def naive_subset_sums_up_to_u(xs,u):
    sums = set([0])
    for x in xs:
        sums2 = set(sums)
        for y in sums:
            if x+y<u :
                sums2.add(x+y)
        sums = sums2
    return sums

# output is list of subset sums
def subset_sums_up_to_u(xs,u):
    eps = 0.0001 # account for floating error

    # list to matrix representation
    def L2M(S):
        c1 = map(itemgetter(0),S)
        c2 = map(itemgetter(1),S)
        M = zeros((max(c1)+1,max(c2)+1))
        M[c1,c2] = 1
        return M

    # matrix to list representation
    def M2L(M):
        return zip(*nonzero(select([M>eps], [1])))

    def minkowski((A,kA,aA,bA),(B,kB,aB,bB)):
        if kB == 0:
            return (A,kA,aA,bA)
        # info of the output
        k,a,b = kA+kB, min(aA,aB), max(bA,bB)
        return minkowski2(A,B,k,a,b)

    # taking minkowski sum of two sets A and B, such that
    # it is known that A, B, A+B are contained in the union of intervals
    # [i*a, i*b] for all 0<=i<=k
    def minkowski2(A,B,k,a,b):
        if a <= min(1+u//a,k)*(b-a):
            h    = lambda x     : (0,x)
            invh = lambda (_,x) : x
        else:
            h    = lambda x     : divmod(x, a)
            invh = lambda (i,j) : i*a+j
        MA, MB = L2M(map(h,A)), L2M(map(h,B))
        invMC = fftconvolve(MA, MB)
        C = takewhile(lambda x: x < u, map(invh, M2L(invMC)))
        return (C, k, a, b)

    # combine a list, where each element of the list is
    # (set of subset sums, number of generators, lower bound of generators, upper bound of generators)
    def combine(xs):
        if len(xs)==1:
            return xs[0]
        if len(xs)%2 != 0:
            xs.append(([],0,0,0))
        return combine([minkowski(*X) for X in zip(xs[0::2],xs[1::2])])

    ys = [([0,x],1,x,x) for x in sorted(xs) if x<u and x>0]
    return combine(ys)[0]
	#include <vector>
	#include <stdint.h>

	bool is_bit_set(uint64_t num, int position){
	return 1 == ((num >> position) & 1);
	}

	std::vector<int> subset_sums_bitset(std::vector<int>& v, int u){
	int n=v.size();
	int m=1+u/64;
	uint64_t* dp = new uint64_t[m];
	memset(dp, 0, m * sizeof dp[0]);
	dp[0] = 1;

	for(int i=0;i<n;i++){
	int k = v[i];
	if(k>u){
	continue;
	}
	int q = k/64;
	int r = k%64;
	// shift by 64 is undefined behavior :(
	if(r!=0){
	for(int j=m-1;j-q>=1;j--){
	dp[j]\|= dp[j-q]<<r;
	dp[j]\|= dp[j-q-1]>>(64-r);
	}
	dp[q]\|= dp[0]<<r;
	}else{
	for(int j=m-1;j-q>=0;j--){
	dp[j]\|= dp[j-q];
	}
	}
	}
	std::vector<int> result;
	for(int i=0;i<m;i++){
	for(int j=0;j<64;j++){
	if(is_bit_set(dp[i],j)){
	result.push_back(i*64+j);
	}
	}
	}
	delete[] dp;
	return result;
	}
	from numpy import transpose, nonzero, select, zeros
	from scipy.signal import fftconvolve
	from operator import itemgetter
	from itertools import takewhile, product

	# to test, try the following
	# > import random
	# > xs = list(set(random.sample(xrange(10000), 500)))
	# > subset_sums_up_to_u(xs,50000) == naive_subset_sums_up_to_u(xs,50000)

	def naive_subset_sums_up_to_u(xs,u):
	sums = set([0])
	for x in xs:
	sums2 = set(sums)
	for y in sums:
	if x+y<u :
	sums2.add(x+y)
	sums = sums2
	return sums

	# output is list of subset sums
	def subset_sums_up_to_u(xs,u):
	eps = 0.0001 # account for floating error

	# list to matrix representation
	def L2M(S):
	c1 = map(itemgetter(0),S)
	c2 = map(itemgetter(1),S)
	M = zeros((max(c1)+1,max(c2)+1))
	M[c1,c2] = 1
	return M

	# matrix to list representation
	def M2L(M):
	return zip(*nonzero(select([M>eps], [1])))

	def minkowski((A,kA,aA,bA),(B,kB,aB,bB)):
	if kB == 0:
	return (A,kA,aA,bA)
	# info of the output
	k,a,b = kA+kB, min(aA,aB), max(bA,bB)
	return minkowski2(A,B,k,a,b)

	# taking minkowski sum of two sets A and B, such that
	# it is known that A, B, A+B are contained in the union of intervals
	# [ia, ib] for all 0<=i<=k
	def minkowski2(A,B,k,a,b):
	if a <= min(1+u//a,k)*(b-a):
	h = lambda x : (0,x)
	invh = lambda (_,x) : x
	else:
	h = lambda x : divmod(x, a)
	invh = lambda (i,j) : i*a+j
	MA, MB = L2M(map(h,A)), L2M(map(h,B))
	invMC = fftconvolve(MA, MB)
	C = takewhile(lambda x: x < u, map(invh, M2L(invMC)))
	return (C, k, a, b)

	# combine a list, where each element of the list is
	# (set of subset sums, number of generators, lower bound of generators, upper bound of generators)
	def combine(xs):
	if len(xs)==1:
	return xs[0]
	if len(xs)%2 != 0:
	xs.append(([],0,0,0))
	return combine([minkowski(*X) for X in zip(xs[0::2],xs[1::2])])

	ys = [([0,x],1,x,x) for x in sorted(xs) if x<u and x>0]
	return combine(ys)[0]