Created
October 10, 2013 23:33
-
-
Save j-faria/6927318 to your computer and use it in GitHub Desktop.
Generate data with a particular correlation matrix - resultados da primeira geek meeting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Generate data with a particular correlation matrix | |
# | |
# A useful fact is that if you have a random vector x with covariance matrix Σ, | |
# then the random vector Ax has mean AE(x) and covariance matrix Ω=AΣA^T. | |
# So, if you start with data that has mean zero, multiplying by A will not | |
# change that. | |
# | |
# Start with (mean zero) uncorrelated data (i.e. the covariance matrix is | |
# diagonal) - since we're talking about the correlation matrix, let's just take | |
# Σ=I. You can transform this to data with a given covariance matrix by choosing | |
# A to be the cholesky square root of Ω - then Ax would have the desired | |
# covariance matrix Ω. | |
from numpy import * | |
from numpy.linalg import cholesky | |
from matplotlib.pylab import * | |
from numpy.random import randn | |
n = int(raw_input("How many points? ")) | |
if n>600: print 'Are you trying to kill me here?' | |
if n>300: print 'these many might take some time, be patient!' | |
x = randn(n) # standard normally distributed values | |
# they have covariance matrix = I, right??? | |
### nice plot | |
fig = plt.figure() | |
ax1 = plt.subplot2grid((2,3), (0,0), colspan=2) | |
ax1.set_title(str(n) + ' normally distributed random values') | |
ax1.plot(x) | |
ax2 = plt.subplot2grid((2,3), (0,2)) | |
ax2.set_axis_bgcolor("#555555") | |
ax2.hist(x, orientation='horizontal', facecolor="#67A9CF") | |
### | |
### building the target covariance matrix Ω (weird way to build it!!!!) | |
# this array gives the elements of each row after the diagonal | |
# it doesn't make any sense now but we can think of the values as given by some | |
# function that depends on the i,j location on the matrix | |
ss = linspace(0.9, 0.1, n) | |
omega = diag([1]*n) # diagonal (identity matrix) | |
for i, s in enumerate(ss): | |
j = i+1 # to start on 1 and be more clear below | |
omega = omega + diag([s]*(n-j), k=j) + diag([s]*(n-j), k=-j) # off main diagonal | |
if n<5: | |
print 'omega = ' | |
print omega | |
# cholesky decomposition | |
A = cholesky(omega) | |
if n<5: | |
print 'A = ' | |
print A | |
y = dot(A,x) # they are supposed to have covariance matrix Ω? | |
# how do we check that it's true??? | |
# nice plot continued | |
ax1 = plt.subplot2grid((2,3), (1,0), colspan=2) | |
ax1.set_title('after some magic...') | |
ax1.plot(y) | |
ax2 = plt.subplot2grid((2,3), (1,2)) | |
ax2.set_axis_bgcolor("#555555") | |
ax2.hist(y, orientation='horizontal', facecolor="#67A9CF") | |
### | |
fig.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment