Skip to content

Instantly share code, notes, and snippets.

@xdavio
Last active October 6, 2016 15:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xdavio/c90347adda522ba784e4e37b31d36535 to your computer and use it in GitHub Desktop.
Save xdavio/c90347adda522ba784e4e37b31d36535 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
class CorrStruct():
"""This class generates valid binary correlation matrices
"""
def __init__(self, K=10, n=100, mindistance=.9):
"""
Parameters
----------
K : int
Dimension of simulated variable.
mindistance : float
0 < mindistance < 1 must hold. A larger value means
larger marginal probabilities for the K-dimensional
binary vector.
n : int
Number of binary vectors to simulate.
"""
self.K = K
self.cube = {}
self.marginal = {}
for i in range(K):
self.cube[i] = self.make_cube(mindistance)
self.marginal[i] = self.vol_cube(self.cube[i])
self.sim = correlated_binary_rvs(*self.get_overlaps(), n)
def make_cube(self, *args):
cube = np.zeros((self.K, 2))
for i in range(self.K):
cube[i, :] = self.get_pins(*args)
return cube
def get_pins(self, mindistance=.9):
out = np.zeros(2)
while out[1] - out[0] < mindistance:
out = np.sort(np.random.random(2))
return out
def vol_cube(self, cube):
"""expects a K,2 shaped np.array
"""
return np.product(cube[:, 1] - cube[:, 0])
def get_overlap(self, i, j):
cubea = self.cube[i]
cubeb = self.cube[j]
# the logic in the next three lines is the following:
# throw two red pins and two blue pins randomly on
# (0,1). This defines a red region and a blue region.
# The following lines obtain the volume of that intersection
# and extend it to obtaining the volume of the intersection
# in the hypercube space.
lower = np.max(np.vstack((cubea[:, 0], cubeb[:, 0])), axis=0)[:, None]
upper = np.min(np.vstack((cubea[:, 1], cubeb[:, 1])), axis=0)[:, None]
return self.vol_cube(np.hstack((lower, upper)))
def get_overlaps(self):
pAB = np.zeros((self.K, self.K))
corr = np.zeros((self.K, self.K))
pA = self.marginal
for i, j in combinations(range(self.K), 2):
pAB[i, j] = self.get_overlap(i, j)
corr[i, j] = (pAB[i, j] - pA[i]*pA[j]) / np.sqrt(pA[i]*(1-pA[i])
* pA[j]*(1-pA[j]))
corr = corr + corr.T
np.fill_diagonal(corr, 1)
marginal = np.array([self.marginal[x] for x in self.marginal])
return marginal, corr
def simulate_binary(n, dims, mindistances):
"""Horizontally stacks the structure CorrStruct and returns the output
as simulated random variables.
Examples
--------
> simulate_binary(100, [5,5,5,5], [.9, .8, .7, .6])
Parameters
----------
n : int
sample size
dims : list of int
dimension of each correlation block
mindistances : list of floats or float
Larger means higher marginal probabilities. each float in (0,1).
If list, must match length of dims. If not list, then the same
marginal probability strength is used for all blocks defined by
dims.
Returns
-------
np.array
n rows by np.sum(dims) columns. Simulated data.
"""
if not (len(dims) == len(mindistances) or type(mindistances) is float):
raise Exception(('Dimension Vector should have number of '
'elements equal to the number of mindistance '
'parameters, or mindistance should have length 1'))
n_blocks = len(dims)
if type(mindistances) is float:
mindistances = np.ones(n_blocks) * mindistances
corrs = [] # instantiate correlation blocks
for i in range(n_blocks):
out = CorrStruct(dims[i], n, mindistances[i])
corrs.append(out.sim)
return np.hstack(tuple(corrs))
def corr_heat(df):
"""
References
----------
https://stanford.edu/~mwaskom/software/seaborn/examples/many_pairwise_correlations.html
"""
corr = np.cov(data.T)
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
return sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=5, yticklabels=5,
linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
data = simulate_binary(100, [5, 5, 5, 5, 5, 5, 5], .9)
corr_heat(data)
plt.show()
@xdavio
Copy link
Author

xdavio commented Oct 6, 2016

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment