xziyue/correlation_comp.py

## correlation_comp.py
import itertools
import numpy as np


class Calculator:

    def __init__(self, vars):
        self.vars = vars
        self.num_vars = len(vars)
        self.var_index = np.arange(self.num_vars, dtype=np.uint64)
        self.hasher = np.power(2, self.var_index, dtype=np.uint64)
        self.hash_mask = np.zeros(self.num_vars, dtype=np.uint64)
        # used to store computed pairs
        self.comp_table = dict()

    # find a unique way to hash combinations
    # each combination is mapped to an integer
    # at maximum, only supports 64 variables!
    def hash_vars(self, var_inds):
        self.hash_mask.fill(0)
        for ind in var_inds:
            self.hash_mask[ind] = 1
        return np.sum(self.hash_mask * self.hasher)

    # returns the variables in the combination
    def inv_hash(self, hash_val):
        bin_repr = reversed(bin(hash_val).lstrip('0b'))
        indices = []
        for i, v in enumerate(bin_repr):
            if v == '1':
                indices.append(i)
        return indices

    # a dummy correlation function
    # just to show the effect of the algorithm
    @staticmethod
    def correlation(v1, v2):
        return f'({v1}@{v2})'

    # a dummy weight computation function
    @staticmethod
    def weight(vals):
        return 'w({})'.format(','.join(vals))

    # k is the number of variables to consider, k <= len(vars)
    def run(self, k=None):
        if k is None:
            k = self.num_vars
        assert 2 <= k <= self.num_vars

        # deal with the special case for 2-combinations
        for v1ind, v2ind in itertools.combinations(self.var_index, r=2):
            tind = self.hash_vars((v1ind, v2ind))
            self.comp_table[tind] = self.correlation(self.vars[v1ind], self.vars[v2ind])

        # build the table for the rest
        for i in range(3, k + 1):
            # build the look up table from bottom to top
            for comb in itertools.combinations(self.var_index, r=i):
                # the value of the combination is a weighted average
                vals = []
                # the value is computed in a one-vs-rest manner?
                for comb_ind, var_ind in enumerate(comb):
                    head_ind = var_ind
                    tail_inds = comb[:comb_ind] + comb[comb_ind + 1:]

                    # hash the tail combination
                    tail_table_ind = self.hash_vars(tail_inds)
                    # the tail must have been computed
                    tail_value = self.comp_table[tail_table_ind]

                    # compute correlation for this pair
                    new_value = self.correlation(self.vars[head_ind], tail_value)
                    vals.append(new_value)

                table_ind = self.hash_vars(comb)
                self.comp_table[table_ind] = self.weight(vals)


# dummy variables for illustration
sample_vars = [f'v{i}' for i in range(1, 6)]
# initialize the class
calc = Calculator(sample_vars)
# run dummy compuatation
calc.run(k=4)
for key, val in calc.comp_table.items():
    indices = calc.inv_hash(key)
    var_list = ','.join([sample_vars[i] for i in indices])
    print(f'c({var_list})={val}')


"""
c(v1,v2)=(v1@v2)
c(v1,v3)=(v1@v3)
c(v1,v4)=(v1@v4)
c(v1,v5)=(v1@v5)
c(v2,v3)=(v2@v3)
c(v2,v4)=(v2@v4)
c(v2,v5)=(v2@v5)
c(v3,v4)=(v3@v4)
c(v3,v5)=(v3@v5)
c(v4,v5)=(v4@v5)
c(v1,v2,v3)=w((v1@c(v2,v3)),(v2@c(v1,v3)),(v3@c(v1,v2)))
c(v1,v2,v4)=w((v1@c(v2,v4)),(v2@c(v1,v4)),(v4@c(v1,v2)))
c(v1,v2,v5)=w((v1@c(v2,v5)),(v2@c(v1,v5)),(v5@c(v1,v2)))
c(v1,v3,v4)=w((v1@c(v3,v4)),(v3@c(v1,v4)),(v4@c(v1,v3)))
c(v1,v3,v5)=w((v1@c(v3,v5)),(v3@c(v1,v5)),(v5@c(v1,v3)))
c(v1,v4,v5)=w((v1@c(v4,v5)),(v4@c(v1,v5)),(v5@c(v1,v4)))
c(v2,v3,v4)=w((v2@c(v3,v4)),(v3@c(v2,v4)),(v4@c(v2,v3)))
c(v2,v3,v5)=w((v2@c(v3,v5)),(v3@c(v2,v5)),(v5@c(v2,v3)))
c(v2,v4,v5)=w((v2@c(v4,v5)),(v4@c(v2,v5)),(v5@c(v2,v4)))
c(v3,v4,v5)=w((v3@c(v4,v5)),(v4@c(v3,v5)),(v5@c(v3,v4)))
c(v1,v2,v3,v4)=w((v1@c(v2,v3,v4)),(v2@c(v1,v3,v4)),(v3@c(v1,v2,v4)),(v4@c(v1,v2,v3)))
c(v1,v2,v3,v5)=w((v1@c(v2,v3,v5)),(v2@c(v1,v3,v5)),(v3@c(v1,v2,v5)),(v5@c(v1,v2,v3)))
c(v1,v2,v4,v5)=w((v1@c(v2,v4,v5)),(v2@c(v1,v4,v5)),(v4@c(v1,v2,v5)),(v5@c(v1,v2,v4)))
c(v1,v3,v4,v5)=w((v1@c(v3,v4,v5)),(v3@c(v1,v4,v5)),(v4@c(v1,v3,v5)),(v5@c(v1,v3,v4)))
c(v2,v3,v4,v5)=w((v2@c(v3,v4,v5)),(v3@c(v2,v4,v5)),(v4@c(v2,v3,v5)),(v5@c(v2,v3,v4)))
"""
	import itertools
	import numpy as np


	class Calculator:

	def __init__(self, vars):
	self.vars = vars
	self.num_vars = len(vars)
	self.var_index = np.arange(self.num_vars, dtype=np.uint64)
	self.hasher = np.power(2, self.var_index, dtype=np.uint64)
	self.hash_mask = np.zeros(self.num_vars, dtype=np.uint64)
	# used to store computed pairs
	self.comp_table = dict()

	# find a unique way to hash combinations
	# each combination is mapped to an integer
	# at maximum, only supports 64 variables!
	def hash_vars(self, var_inds):
	self.hash_mask.fill(0)
	for ind in var_inds:
	self.hash_mask[ind] = 1
	return np.sum(self.hash_mask * self.hasher)

	# returns the variables in the combination
	def inv_hash(self, hash_val):
	bin_repr = reversed(bin(hash_val).lstrip('0b'))
	indices = []
	for i, v in enumerate(bin_repr):
	if v == '1':
	indices.append(i)
	return indices

	# a dummy correlation function
	# just to show the effect of the algorithm
	@staticmethod
	def correlation(v1, v2):
	return f'({v1}@{v2})'

	# a dummy weight computation function
	@staticmethod
	def weight(vals):
	return 'w({})'.format(','.join(vals))

	# k is the number of variables to consider, k <= len(vars)
	def run(self, k=None):
	if k is None:
	k = self.num_vars
	assert 2 <= k <= self.num_vars

	# deal with the special case for 2-combinations
	for v1ind, v2ind in itertools.combinations(self.var_index, r=2):
	tind = self.hash_vars((v1ind, v2ind))
	self.comp_table[tind] = self.correlation(self.vars[v1ind], self.vars[v2ind])

	# build the table for the rest
	for i in range(3, k + 1):
	# build the look up table from bottom to top
	for comb in itertools.combinations(self.var_index, r=i):
	# the value of the combination is a weighted average
	vals = []
	# the value is computed in a one-vs-rest manner?
	for comb_ind, var_ind in enumerate(comb):
	head_ind = var_ind
	tail_inds = comb[:comb_ind] + comb[comb_ind + 1:]

	# hash the tail combination
	tail_table_ind = self.hash_vars(tail_inds)
	# the tail must have been computed
	tail_value = self.comp_table[tail_table_ind]

	# compute correlation for this pair
	new_value = self.correlation(self.vars[head_ind], tail_value)
	vals.append(new_value)

	table_ind = self.hash_vars(comb)
	self.comp_table[table_ind] = self.weight(vals)


	# dummy variables for illustration
	sample_vars = [f'v{i}' for i in range(1, 6)]
	# initialize the class
	calc = Calculator(sample_vars)
	# run dummy compuatation
	calc.run(k=4)
	for key, val in calc.comp_table.items():
	indices = calc.inv_hash(key)
	var_list = ','.join([sample_vars[i] for i in indices])
	print(f'c({var_list})={val}')


	"""
	c(v1,v2)=(v1@v2)
	c(v1,v3)=(v1@v3)
	c(v1,v4)=(v1@v4)
	c(v1,v5)=(v1@v5)
	c(v2,v3)=(v2@v3)
	c(v2,v4)=(v2@v4)
	c(v2,v5)=(v2@v5)
	c(v3,v4)=(v3@v4)
	c(v3,v5)=(v3@v5)
	c(v4,v5)=(v4@v5)
	c(v1,v2,v3)=w((v1@c(v2,v3)),(v2@c(v1,v3)),(v3@c(v1,v2)))
	c(v1,v2,v4)=w((v1@c(v2,v4)),(v2@c(v1,v4)),(v4@c(v1,v2)))
	c(v1,v2,v5)=w((v1@c(v2,v5)),(v2@c(v1,v5)),(v5@c(v1,v2)))
	c(v1,v3,v4)=w((v1@c(v3,v4)),(v3@c(v1,v4)),(v4@c(v1,v3)))
	c(v1,v3,v5)=w((v1@c(v3,v5)),(v3@c(v1,v5)),(v5@c(v1,v3)))
	c(v1,v4,v5)=w((v1@c(v4,v5)),(v4@c(v1,v5)),(v5@c(v1,v4)))
	c(v2,v3,v4)=w((v2@c(v3,v4)),(v3@c(v2,v4)),(v4@c(v2,v3)))
	c(v2,v3,v5)=w((v2@c(v3,v5)),(v3@c(v2,v5)),(v5@c(v2,v3)))
	c(v2,v4,v5)=w((v2@c(v4,v5)),(v4@c(v2,v5)),(v5@c(v2,v4)))
	c(v3,v4,v5)=w((v3@c(v4,v5)),(v4@c(v3,v5)),(v5@c(v3,v4)))
	c(v1,v2,v3,v4)=w((v1@c(v2,v3,v4)),(v2@c(v1,v3,v4)),(v3@c(v1,v2,v4)),(v4@c(v1,v2,v3)))
	c(v1,v2,v3,v5)=w((v1@c(v2,v3,v5)),(v2@c(v1,v3,v5)),(v3@c(v1,v2,v5)),(v5@c(v1,v2,v3)))
	c(v1,v2,v4,v5)=w((v1@c(v2,v4,v5)),(v2@c(v1,v4,v5)),(v4@c(v1,v2,v5)),(v5@c(v1,v2,v4)))
	c(v1,v3,v4,v5)=w((v1@c(v3,v4,v5)),(v3@c(v1,v4,v5)),(v4@c(v1,v3,v5)),(v5@c(v1,v3,v4)))
	c(v2,v3,v4,v5)=w((v2@c(v3,v4,v5)),(v3@c(v2,v4,v5)),(v4@c(v2,v3,v5)),(v5@c(v2,v3,v4)))
	"""