Skip to content

Instantly share code, notes, and snippets.

@terjehaukaas
Last active October 8, 2021 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save terjehaukaas/b58a47d8c19299b7eb59a826b3fd41f2 to your computer and use it in GitHub Desktop.
Save terjehaukaas/b58a47d8c19299b7eb59a826b3fd41f2 to your computer and use it in GitHub Desktop.
VariableInference
# ------------------------------------------------------------------------
# The following Python code is implemented by Professor Terje Haukaas at
# the University of British Columbia in Vancouver, Canada. It is made
# freely available online at terje.civil.ubc.ca together with notes,
# examples, and additional Python code. Please be cautious when using
# this code; it may contain bugs and comes without warranty of any form.
# ------------------------------------------------------------------------
# Import useful libraries
import numpy as np
from scipy.stats import norm, lognorm, uniform
import matplotlib.pyplot as plt
# Input data (realization of one random variable in each column)
data = np.array([(146517, 205.4),
(153074.5, 204.5),
(132614.9, 190.9),
(149219.6, 203.6),
(141182.6, 196.3),
(142407.4, 191.1),
(143403.5, 192.8),
(135465.1, 191.3),
(154740.3, 204.5),
(133172.4, 187.9),
(138539.4, 193.3),
(149498.5, 201.2),
(143589.6, 201.2),
(152134.4, 203.0),
(128544.3, 195.5),
(147239.6, 203.0),
(151755.2, 194.0),
(136670.6, 198.0),
(173146.0, 214.6),
(146297.5, 203.4),
(148197.5, 203.9),
(160348.1, 212.7),
(158164.2, 211.5),
(143375.3, 194.0),
(156667.0, 206.0),
(139503.7, 188.4),
(138317.7, 197.4),
(149375.6, 200.8),
(162411.9, 214.1),
(143337.1, 200.3),
(118184.1, 181.9),
(155273.8, 209.6),
(148961.0, 200.6),
(146463.4, 192.3),
(145491.6, 198.7),
(146639.7, 196.5),
(145159.5, 207.4),
(141255.9, 197.4),
(133833.3, 196.4),
(151665.2, 197.3),
(133124.5, 195.0),
(171142.9, 213.1),
(163021.5, 209.7),
(152944.9, 207.8),
(150891.1, 208.5),
(141719.6, 200.2),
(122068.6, 178.9),
(142461.7, 196.2),
(131519.7, 196.3),
(165271.3, 209.3)])
# Get number of random variables (numRVs) and number of observations (N)
N = data.shape[0]
numRVs = len(data[0])
# Initialize vectors and matrices
variableVector = np.zeros(N)
R = np.eye(numRVs)
means = []
stdvs = []
plotCounter = 0
# Loop over the random variables, i.e., the columns of the data
for j in range(numRVs):
# Collect the realizations for this random variable
for i in range(N):
variableVector[i] = data[i, j]
# Sort the elements of the vector
variableVector.sort()
# Compute the sum, the sum squared, and record points for the cumulative frequency diagram
vectorSum = 0.0
vectorSumSquared = 0.0
xValues = []
yValues = []
for i in range(N):
value = variableVector[i]
vectorSum += value
vectorSumSquared += value * value
xValues.append(value)
yValues.append((i + 1) / (float (N)))
# Compute mean, variance, and standard deviation
mean = vectorSum / (float (N))
means.append(mean)
variance = 1.0 / (float(N) - 1.0) * (vectorSumSquared - float(N) * mean*mean)
stdv = np.sqrt(variance)
stdvs.append(stdv)
# Print results to the screen
print('\n'"Variable %i: Mean=%.2f, Stdv=%.2f, cov=%.2f percent" % (j+1, mean, stdv, 100.0 * stdv / mean))
# Get min and max realizations and lay out the x-axis
vectorMin = np.min(variableVector)
vectorMax = np.max(variableVector)
myStep = (vectorMax-vectorMin)/30.0
x = np.arange(start=vectorMin, stop=vectorMax+0.01*myStep, step=myStep)
# Normal distribution
normalCDF = norm.cdf(x, mean, stdv)
# Lognormal distribution
meanY = np.log(mean) - 0.5 * np.log(1.0 + (stdv / mean) ** 2)
stdvY = np.sqrt(np.log((stdv / mean) ** 2 + 1.0))
lognormalCDF = lognorm.cdf(x, stdvY, 0.0, np.exp(meanY))
# Plot the cumulative frequency diagram and CDFs
plotCounter += 1
plt.ion()
plt.figure(plotCounter)
plt.plot(xValues, yValues, 'k-', label='Data')
plt.plot(x, normalCDF, 'b-', label='Normal')
plt.plot(x, lognormalCDF, 'r-', label='Lognormal')
plt.title('Cumulative Curves')
plt.legend(loc='upper left')
plt.show()
print('\n'"Press any key to continue...")
plt.waitforbuttonpress()
# Compute correlation matrix
if (numRVs > 1):
# Initialize vectors and the correlation matrix
variableVectori = np.zeros(N)
variableVectorj = np.zeros(N)
R = np.eye(numRVs)
for i in range(numRVs):
# Pick up the i'th vector
for k in range(N):
variableVectori[k] = data[k, i]
for j in range(i):
# Pick up the j'th vector
for k in range(N):
variableVectorj[k] = data[k, j]
# Create scatter plot
plotCounter += 1
plt.ion()
plt.figure(plotCounter)
plt.plot(variableVectori, variableVectorj, 'ko')
plt.title('Scatter Diagram')
plt.show()
print('\n'"Press any key to continue...")
plt.waitforbuttonpress()
# Compute the correlation coefficient
productSum = 0.0
for k in range(N):
productSum += variableVectori[k] * variableVectorj[k]
value = 1.0 / (float(N) - 1.0) * (productSum - N * means[i] * means[j]) / (stdvs[i] * stdvs[j])
R[i, j] = value
R[j, i] = value
print('\n'"The correlation matrix is:")
print(R)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment