Last active
February 4, 2022 06:38
-
-
Save xziyue/79cb3c3d88a4c95367b17a0ae0cc2bff to your computer and use it in GitHub Desktop.
Some random correlation?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
class Calculator: | |
def __init__(self, vars): | |
self.vars = vars | |
self.num_vars = len(vars) | |
self.var_index = np.arange(self.num_vars, dtype=np.uint64) | |
self.hasher = np.power(2, self.var_index, dtype=np.uint64) | |
self.hash_mask = np.zeros(self.num_vars, dtype=np.uint64) | |
# used to store computed pairs | |
self.comp_table = dict() | |
# find a unique way to hash combinations | |
# each combination is mapped to an integer | |
# at maximum, only supports 64 variables! | |
def hash_vars(self, var_inds): | |
self.hash_mask.fill(0) | |
for ind in var_inds: | |
self.hash_mask[ind] = 1 | |
return np.sum(self.hash_mask * self.hasher) | |
# returns the variables in the combination | |
def inv_hash(self, hash_val): | |
bin_repr = reversed(bin(hash_val).lstrip('0b')) | |
indices = [] | |
for i, v in enumerate(bin_repr): | |
if v == '1': | |
indices.append(i) | |
return indices | |
# a dummy correlation function | |
# just to show the effect of the algorithm | |
@staticmethod | |
def correlation(v1, v2): | |
return f'({v1}@{v2})' | |
# a dummy weight computation function | |
@staticmethod | |
def weight(vals): | |
return 'w({})'.format(','.join(vals)) | |
# k is the number of variables to consider, k <= len(vars) | |
def run(self, k=None): | |
if k is None: | |
k = self.num_vars | |
assert 2 <= k <= self.num_vars | |
# deal with the special case for 2-combinations | |
for v1ind, v2ind in itertools.combinations(self.var_index, r=2): | |
tind = self.hash_vars((v1ind, v2ind)) | |
self.comp_table[tind] = self.correlation(self.vars[v1ind], self.vars[v2ind]) | |
# build the table for the rest | |
for i in range(3, k + 1): | |
# build the look up table from bottom to top | |
for comb in itertools.combinations(self.var_index, r=i): | |
# the value of the combination is a weighted average | |
vals = [] | |
# the value is computed in a one-vs-rest manner? | |
for comb_ind, var_ind in enumerate(comb): | |
head_ind = var_ind | |
tail_inds = comb[:comb_ind] + comb[comb_ind + 1:] | |
# hash the tail combination | |
tail_table_ind = self.hash_vars(tail_inds) | |
# the tail must have been computed | |
tail_value = self.comp_table[tail_table_ind] | |
# compute correlation for this pair | |
new_value = self.correlation(self.vars[head_ind], tail_value) | |
vals.append(new_value) | |
table_ind = self.hash_vars(comb) | |
self.comp_table[table_ind] = self.weight(vals) | |
# dummy variables for illustration | |
sample_vars = [f'v{i}' for i in range(1, 6)] | |
# initialize the class | |
calc = Calculator(sample_vars) | |
# run dummy compuatation | |
calc.run(k=4) | |
for key, val in calc.comp_table.items(): | |
indices = calc.inv_hash(key) | |
var_list = ','.join([sample_vars[i] for i in indices]) | |
print(f'c({var_list})={val}') | |
""" | |
c(v1,v2)=(v1@v2) | |
c(v1,v3)=(v1@v3) | |
c(v1,v4)=(v1@v4) | |
c(v1,v5)=(v1@v5) | |
c(v2,v3)=(v2@v3) | |
c(v2,v4)=(v2@v4) | |
c(v2,v5)=(v2@v5) | |
c(v3,v4)=(v3@v4) | |
c(v3,v5)=(v3@v5) | |
c(v4,v5)=(v4@v5) | |
c(v1,v2,v3)=w((v1@c(v2,v3)),(v2@c(v1,v3)),(v3@c(v1,v2))) | |
c(v1,v2,v4)=w((v1@c(v2,v4)),(v2@c(v1,v4)),(v4@c(v1,v2))) | |
c(v1,v2,v5)=w((v1@c(v2,v5)),(v2@c(v1,v5)),(v5@c(v1,v2))) | |
c(v1,v3,v4)=w((v1@c(v3,v4)),(v3@c(v1,v4)),(v4@c(v1,v3))) | |
c(v1,v3,v5)=w((v1@c(v3,v5)),(v3@c(v1,v5)),(v5@c(v1,v3))) | |
c(v1,v4,v5)=w((v1@c(v4,v5)),(v4@c(v1,v5)),(v5@c(v1,v4))) | |
c(v2,v3,v4)=w((v2@c(v3,v4)),(v3@c(v2,v4)),(v4@c(v2,v3))) | |
c(v2,v3,v5)=w((v2@c(v3,v5)),(v3@c(v2,v5)),(v5@c(v2,v3))) | |
c(v2,v4,v5)=w((v2@c(v4,v5)),(v4@c(v2,v5)),(v5@c(v2,v4))) | |
c(v3,v4,v5)=w((v3@c(v4,v5)),(v4@c(v3,v5)),(v5@c(v3,v4))) | |
c(v1,v2,v3,v4)=w((v1@c(v2,v3,v4)),(v2@c(v1,v3,v4)),(v3@c(v1,v2,v4)),(v4@c(v1,v2,v3))) | |
c(v1,v2,v3,v5)=w((v1@c(v2,v3,v5)),(v2@c(v1,v3,v5)),(v3@c(v1,v2,v5)),(v5@c(v1,v2,v3))) | |
c(v1,v2,v4,v5)=w((v1@c(v2,v4,v5)),(v2@c(v1,v4,v5)),(v4@c(v1,v2,v5)),(v5@c(v1,v2,v4))) | |
c(v1,v3,v4,v5)=w((v1@c(v3,v4,v5)),(v3@c(v1,v4,v5)),(v4@c(v1,v3,v5)),(v5@c(v1,v3,v4))) | |
c(v2,v3,v4,v5)=w((v2@c(v3,v4,v5)),(v3@c(v2,v4,v5)),(v4@c(v2,v3,v5)),(v5@c(v2,v3,v4))) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment