-
-
Save xdavio/c90347adda522ba784e4e37b31d36535 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CorrStruct(): | |
"""This class generates valid binary correlation matrices | |
""" | |
def __init__(self, K=10, n=100, mindistance=.9): | |
""" | |
Parameters | |
---------- | |
K : int | |
Dimension of simulated variable. | |
mindistance : float | |
0 < mindistance < 1 must hold. A larger value means | |
larger marginal probabilities for the K-dimensional | |
binary vector. | |
n : int | |
Number of binary vectors to simulate. | |
""" | |
self.K = K | |
self.cube = {} | |
self.marginal = {} | |
for i in range(K): | |
self.cube[i] = self.make_cube(mindistance) | |
self.marginal[i] = self.vol_cube(self.cube[i]) | |
self.sim = correlated_binary_rvs(*self.get_overlaps(), n) | |
def make_cube(self, *args): | |
cube = np.zeros((self.K, 2)) | |
for i in range(self.K): | |
cube[i, :] = self.get_pins(*args) | |
return cube | |
def get_pins(self, mindistance=.9): | |
out = np.zeros(2) | |
while out[1] - out[0] < mindistance: | |
out = np.sort(np.random.random(2)) | |
return out | |
def vol_cube(self, cube): | |
"""expects a K,2 shaped np.array | |
""" | |
return np.product(cube[:, 1] - cube[:, 0]) | |
def get_overlap(self, i, j): | |
cubea = self.cube[i] | |
cubeb = self.cube[j] | |
# the logic in the next three lines is the following: | |
# throw two red pins and two blue pins randomly on | |
# (0,1). This defines a red region and a blue region. | |
# The following lines obtain the volume of that intersection | |
# and extend it to obtaining the volume of the intersection | |
# in the hypercube space. | |
lower = np.max(np.vstack((cubea[:, 0], cubeb[:, 0])), axis=0)[:, None] | |
upper = np.min(np.vstack((cubea[:, 1], cubeb[:, 1])), axis=0)[:, None] | |
return self.vol_cube(np.hstack((lower, upper))) | |
def get_overlaps(self): | |
pAB = np.zeros((self.K, self.K)) | |
corr = np.zeros((self.K, self.K)) | |
pA = self.marginal | |
for i, j in combinations(range(self.K), 2): | |
pAB[i, j] = self.get_overlap(i, j) | |
corr[i, j] = (pAB[i, j] - pA[i]*pA[j]) / np.sqrt(pA[i]*(1-pA[i]) | |
* pA[j]*(1-pA[j])) | |
corr = corr + corr.T | |
np.fill_diagonal(corr, 1) | |
marginal = np.array([self.marginal[x] for x in self.marginal]) | |
return marginal, corr | |
def simulate_binary(n, dims, mindistances): | |
"""Horizontally stacks the structure CorrStruct and returns the output | |
as simulated random variables. | |
Examples | |
-------- | |
> simulate_binary(100, [5,5,5,5], [.9, .8, .7, .6]) | |
Parameters | |
---------- | |
n : int | |
sample size | |
dims : list of int | |
dimension of each correlation block | |
mindistances : list of floats or float | |
Larger means higher marginal probabilities. each float in (0,1). | |
If list, must match length of dims. If not list, then the same | |
marginal probability strength is used for all blocks defined by | |
dims. | |
Returns | |
------- | |
np.array | |
n rows by np.sum(dims) columns. Simulated data. | |
""" | |
if not (len(dims) == len(mindistances) or type(mindistances) is float): | |
raise Exception(('Dimension Vector should have number of ' | |
'elements equal to the number of mindistance ' | |
'parameters, or mindistance should have length 1')) | |
n_blocks = len(dims) | |
if type(mindistances) is float: | |
mindistances = np.ones(n_blocks) * mindistances | |
corrs = [] # instantiate correlation blocks | |
for i in range(n_blocks): | |
out = CorrStruct(dims[i], n, mindistances[i]) | |
corrs.append(out.sim) | |
return np.hstack(tuple(corrs)) | |
def corr_heat(df): | |
""" | |
References | |
---------- | |
https://stanford.edu/~mwaskom/software/seaborn/examples/many_pairwise_correlations.html | |
""" | |
corr = np.cov(data.T) | |
mask = np.zeros_like(corr, dtype=np.bool) | |
mask[np.triu_indices_from(mask)] = True | |
# Set up the matplotlib figure | |
f, ax = plt.subplots(figsize=(11, 9)) | |
# Generate a custom diverging colormap | |
cmap = sns.diverging_palette(220, 10, as_cmap=True) | |
# Draw the heatmap with the mask and correct aspect ratio | |
return sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, | |
square=True, xticklabels=5, yticklabels=5, | |
linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) | |
data = simulate_binary(100, [5, 5, 5, 5, 5, 5, 5], .9) | |
corr_heat(data) | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://gist.github.com/jseabold/b91555532584a918e51b88a818003e2e