Last active
October 8, 2021 17:56
-
-
Save terjehaukaas/b58a47d8c19299b7eb59a826b3fd41f2 to your computer and use it in GitHub Desktop.
VariableInference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------ | |
# The following Python code is implemented by Professor Terje Haukaas at | |
# the University of British Columbia in Vancouver, Canada. It is made | |
# freely available online at terje.civil.ubc.ca together with notes, | |
# examples, and additional Python code. Please be cautious when using | |
# this code; it may contain bugs and comes without warranty of any form. | |
# ------------------------------------------------------------------------ | |
# Import useful libraries | |
import numpy as np | |
from scipy.stats import norm, lognorm, uniform | |
import matplotlib.pyplot as plt | |
# Input data (realization of one random variable in each column) | |
data = np.array([(146517, 205.4), | |
(153074.5, 204.5), | |
(132614.9, 190.9), | |
(149219.6, 203.6), | |
(141182.6, 196.3), | |
(142407.4, 191.1), | |
(143403.5, 192.8), | |
(135465.1, 191.3), | |
(154740.3, 204.5), | |
(133172.4, 187.9), | |
(138539.4, 193.3), | |
(149498.5, 201.2), | |
(143589.6, 201.2), | |
(152134.4, 203.0), | |
(128544.3, 195.5), | |
(147239.6, 203.0), | |
(151755.2, 194.0), | |
(136670.6, 198.0), | |
(173146.0, 214.6), | |
(146297.5, 203.4), | |
(148197.5, 203.9), | |
(160348.1, 212.7), | |
(158164.2, 211.5), | |
(143375.3, 194.0), | |
(156667.0, 206.0), | |
(139503.7, 188.4), | |
(138317.7, 197.4), | |
(149375.6, 200.8), | |
(162411.9, 214.1), | |
(143337.1, 200.3), | |
(118184.1, 181.9), | |
(155273.8, 209.6), | |
(148961.0, 200.6), | |
(146463.4, 192.3), | |
(145491.6, 198.7), | |
(146639.7, 196.5), | |
(145159.5, 207.4), | |
(141255.9, 197.4), | |
(133833.3, 196.4), | |
(151665.2, 197.3), | |
(133124.5, 195.0), | |
(171142.9, 213.1), | |
(163021.5, 209.7), | |
(152944.9, 207.8), | |
(150891.1, 208.5), | |
(141719.6, 200.2), | |
(122068.6, 178.9), | |
(142461.7, 196.2), | |
(131519.7, 196.3), | |
(165271.3, 209.3)]) | |
# Get number of random variables (numRVs) and number of observations (N) | |
N = data.shape[0] | |
numRVs = len(data[0]) | |
# Initialize vectors and matrices | |
variableVector = np.zeros(N) | |
R = np.eye(numRVs) | |
means = [] | |
stdvs = [] | |
plotCounter = 0 | |
# Loop over the random variables, i.e., the columns of the data | |
for j in range(numRVs): | |
# Collect the realizations for this random variable | |
for i in range(N): | |
variableVector[i] = data[i, j] | |
# Sort the elements of the vector | |
variableVector.sort() | |
# Compute the sum, the sum squared, and record points for the cumulative frequency diagram | |
vectorSum = 0.0 | |
vectorSumSquared = 0.0 | |
xValues = [] | |
yValues = [] | |
for i in range(N): | |
value = variableVector[i] | |
vectorSum += value | |
vectorSumSquared += value * value | |
xValues.append(value) | |
yValues.append((i + 1) / (float (N))) | |
# Compute mean, variance, and standard deviation | |
mean = vectorSum / (float (N)) | |
means.append(mean) | |
variance = 1.0 / (float(N) - 1.0) * (vectorSumSquared - float(N) * mean*mean) | |
stdv = np.sqrt(variance) | |
stdvs.append(stdv) | |
# Print results to the screen | |
print('\n'"Variable %i: Mean=%.2f, Stdv=%.2f, cov=%.2f percent" % (j+1, mean, stdv, 100.0 * stdv / mean)) | |
# Get min and max realizations and lay out the x-axis | |
vectorMin = np.min(variableVector) | |
vectorMax = np.max(variableVector) | |
myStep = (vectorMax-vectorMin)/30.0 | |
x = np.arange(start=vectorMin, stop=vectorMax+0.01*myStep, step=myStep) | |
# Normal distribution | |
normalCDF = norm.cdf(x, mean, stdv) | |
# Lognormal distribution | |
meanY = np.log(mean) - 0.5 * np.log(1.0 + (stdv / mean) ** 2) | |
stdvY = np.sqrt(np.log((stdv / mean) ** 2 + 1.0)) | |
lognormalCDF = lognorm.cdf(x, stdvY, 0.0, np.exp(meanY)) | |
# Plot the cumulative frequency diagram and CDFs | |
plotCounter += 1 | |
plt.ion() | |
plt.figure(plotCounter) | |
plt.plot(xValues, yValues, 'k-', label='Data') | |
plt.plot(x, normalCDF, 'b-', label='Normal') | |
plt.plot(x, lognormalCDF, 'r-', label='Lognormal') | |
plt.title('Cumulative Curves') | |
plt.legend(loc='upper left') | |
plt.show() | |
print('\n'"Press any key to continue...") | |
plt.waitforbuttonpress() | |
# Compute correlation matrix | |
if (numRVs > 1): | |
# Initialize vectors and the correlation matrix | |
variableVectori = np.zeros(N) | |
variableVectorj = np.zeros(N) | |
R = np.eye(numRVs) | |
for i in range(numRVs): | |
# Pick up the i'th vector | |
for k in range(N): | |
variableVectori[k] = data[k, i] | |
for j in range(i): | |
# Pick up the j'th vector | |
for k in range(N): | |
variableVectorj[k] = data[k, j] | |
# Create scatter plot | |
plotCounter += 1 | |
plt.ion() | |
plt.figure(plotCounter) | |
plt.plot(variableVectori, variableVectorj, 'ko') | |
plt.title('Scatter Diagram') | |
plt.show() | |
print('\n'"Press any key to continue...") | |
plt.waitforbuttonpress() | |
# Compute the correlation coefficient | |
productSum = 0.0 | |
for k in range(N): | |
productSum += variableVectori[k] * variableVectorj[k] | |
value = 1.0 / (float(N) - 1.0) * (productSum - N * means[i] * means[j]) / (stdvs[i] * stdvs[j]) | |
R[i, j] = value | |
R[j, i] = value | |
print('\n'"The correlation matrix is:") | |
print(R) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment