Last active
December 9, 2023 16:38
-
-
Save terjehaukaas/3cb3c735288df46c0bdcda2ec4728394 to your computer and use it in GitHub Desktop.
ModelInference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------ | |
# The following Python code is implemented by Professor Terje Haukaas at | |
# the University of British Columbia in Vancouver, Canada. It is made | |
# freely available online at terje.civil.ubc.ca together with notes, | |
# examples, and additional Python code. Please be cautious when using | |
# this code; it may contain bugs and comes without warranty of any kind. | |
# | |
# In fact, this is a particularly quick-and-dirty implementation of | |
# Ordinary and Bayesian Linear Regression. One part of this code is a | |
# search for good explanatory functions. That isn't always meaningful | |
# but it provides an insight for the specific data given below, which | |
# was actually generated with a deterministic model with four input- | |
# parameters. If you know structural analysis, can you guess which | |
# model generated the data? | |
# ------------------------------------------------------------------------ | |
# Import useful libraries | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# Input data (first column=response, second=intercept, the rest are regressors) | |
data = np.array([(6.8, 1.0, 1000.0, 2000.0, 9500.0, 41096604.2), | |
(22.1, 1.0, 1572.0, 2532.0, 11573.0, 33260953.5), | |
(59.8, 1.0, 1288.0, 2801.0, 13647.0, 11565502.7), | |
(5.9, 1.0, 1543.0, 1370.0, 14614.0, 15284895.2), | |
(20.5, 1.0, 1572.0, 1113.0, 9872.0, 3562069.3), | |
(30.1, 1.0, 877.0, 2323.0, 12614.0, 9653979.2), | |
(176.1, 1.0, 1345.0, 2980.0, 12950.0, 5202934.7), | |
(11.0, 1.0, 1233.0, 2301.0, 15342.0, 29747448.2), | |
(26.4, 1.0, 1675.0, 1503.0, 10812.0, 6640981.3), | |
(21.2, 1.0, 1777.0, 2117.0, 12609.0, 21041461.3), | |
(4.7, 1.0, 843.0, 1448.0, 8606.0, 21041461.3), | |
(0.9, 1.0, 1909.0, 736.0, 9514.0, 31034422.7), | |
(35.9, 1.0, 1971.0, 1301.0, 13521.0, 2980441.3), | |
(0.6, 1.0, 1122.0, 759.0, 14311.0, 17859214.7), | |
(0.8, 1.0, 1833.0, 854.0, 11561.0, 40574196.0), | |
(24.0, 1.0, 1943.0, 2382.0, 9712.0, 37532448.0), | |
(39.3, 1.0, 1225.0, 1588.0, 11686.0, 3562069.3), | |
(21.8, 1.0, 1332.0, 1866.0, 15565.0, 8504460.2), | |
(40.0, 1.0, 1354.0, 1792.0, 15418.0, 4214833.3), | |
(63.3, 1.0, 548.0, 2988.0, 8489.0, 9067078.7), | |
(4.1, 1.0, 1257.0, 790.0, 12152.0, 4100925.2), | |
(9.6, 1.0, 1280.0, 2098.0, 12282.0, 33260953.5), | |
(48.2, 1.0, 1456.0, 2464.0, 13828.0, 10902678.2), | |
(0.5, 1.0, 1178.0, 797.0, 15198.0, 25333333.3), | |
(9.8, 1.0, 940.0, 2154.0, 9893.0, 32357991.2), | |
(66.0, 1.0, 1342.0, 2703.0, 12034.0, 11120725.3), | |
(41.7, 1.0, 1125.0, 2976.0, 10736.0, 22064924.8), | |
(8.0, 1.0, 738.0, 2139.0, 15346.0, 19726762.7), | |
(5.9, 1.0, 1982.0, 1148.0, 13479.0, 12490321.3), | |
(1.8, 1.0, 1882.0, 955.0, 8676.0, 34180559.8), | |
(5.6, 1.0, 1533.0, 1055.0, 10570.0, 10058989.5), | |
(13.0, 1.0, 1331.0, 2302.0, 11716.0, 35591509.3), | |
(55.5, 1.0, 712.0, 2693.0, 8302.0, 10058989.5), | |
(29.7, 1.0, 1002.0, 2848.0, 8850.0, 29326500.0), | |
(32.4, 1.0, 1787.0, 2024.0, 8113.0, 18777513.2), | |
(9.1, 1.0, 1303.0, 1932.0, 8888.0, 38528833.3), | |
(18.6, 1.0, 690.0, 1686.0, 9824.0, 6037642.7), | |
(0.8, 1.0, 1339.0, 922.0, 13510.0, 31034422.7), | |
(4.3, 1.0, 1084.0, 1905.0, 15503.0, 37532448.0), | |
(100.9, 1.0, 1274.0, 2290.0, 8373.0, 6037642.7)]) | |
# Extract the y-vector and the X-matrix | |
rows = data.shape[0] | |
columns = len(data[0]) | |
y = data[:,0] | |
X = data[:,1:columns] | |
# Determine number of regressors (k) and observations (n) | |
n = rows | |
k = columns-1 | |
nu = n-k | |
# Print info and give warning if there is little data | |
print('\n'"There are", n, "observations and", k, "regressors") | |
# Compute the ordinary least squares estimate, i.e., the mean of the thetas | |
XX = np.transpose(X).dot(X) | |
XXinv = np.linalg.inv(XX) | |
XXinvX = XXinv.dot(np.transpose(X)) | |
mean_theta = XXinvX.dot(y) | |
# Plot predictions vs. observations | |
plt.ion() | |
predictions = [] | |
for i in range(n): | |
model = 0 | |
for j in range(k): | |
model += mean_theta[j] * X[i, j] | |
predictions.append(model) | |
plt.plot(y, predictions, 'ko') | |
plt.xlabel("Observations") | |
plt.ylabel("Predictions") | |
print('\n'"Click somewhere in the plot to continue...") | |
plt.waitforbuttonpress() | |
# Compute X*theta and the ordinary least squares error variance | |
Xtheta = X.dot(mean_theta) | |
y_minus_Xtheta = y - Xtheta | |
dot_product_y_minus_Xtheta = (np.transpose(y_minus_Xtheta)).dot(y_minus_Xtheta) | |
s_squared = dot_product_y_minus_Xtheta/nu | |
# Compute the covariance matrix for the model parameters | |
covarianceMatrix = s_squared * XXinv | |
# Collect the standard deviations from the covariance matrix | |
stdv_theta = [] | |
for i in range(k): | |
stdv_theta.append(np.sqrt(covarianceMatrix[i, i])) | |
# Collect the coefficient of variation in a vector (in percent!) | |
cov_theta = [] | |
for i in range(k): | |
cov_theta.append(abs(100.0 * stdv_theta[i] / mean_theta[i])) | |
# Place 1/stdvs on the diagonal of the D^-1 matrix and extract the correlation matrix | |
Dinv = np.eye(k) | |
for i in range(k): | |
Dinv[i,i] = 1.0/stdv_theta[i] | |
correlationMatrix = (Dinv.dot(covarianceMatrix)).dot(Dinv) | |
# Compute statistics for the epsilon variable | |
mean_sigma = np.sqrt(s_squared) | |
variance_sigma = s_squared/(2.0*(nu-4.0)) | |
stdv_sigma = np.sqrt(variance_sigma) | |
cov_sigma = abs(stdv_sigma/mean_sigma) | |
# Print the results | |
print('\n'"Mean of Sigma = %.2f C.o.v. of Sigma = %.2f" % (mean_sigma, 100.0*cov_sigma), "percent") | |
print('\n'"Posterior statistics for the model parameters (thetas):\n") | |
for i in range(k): | |
print(" Mean(%i) = %9.3g Cov(%i) = %.2f" % (i+1, mean_theta[i], i+1, cov_theta[i]), "percent") | |
# Loop over all possible explanatory functions of the form x1^m1 * x2^m2 * ... | |
if int(sum(data[:,1])) != n: | |
print("WARNING: The search algorithm assumes 1.0 in the first column of the data") | |
powerLimit = 3 | |
numCombinations = (2 * powerLimit +1)**(k-1) | |
mValues = np.full(k-1, -3) | |
print('\n'"Results from the search for explanatory functions, if any:") | |
for combinations in range(numCombinations): | |
newX = np.ones((n, 2)) | |
myString = "" | |
for j in range(k-1): | |
# Record a string with the full explanatory function | |
if mValues[j] != 0: | |
if j != 0: | |
myString += " " | |
if mValues[j] < 0: | |
myString += "/ " | |
else: | |
myString += "* " | |
if mValues[j] == 1 or mValues[j] == -1: | |
myString += ("x%i" % (j + 1)) | |
else: | |
myString += ("x%i^%i" % (j + 1, abs(mValues[j]))) | |
# Calculate the value of this explanatory function for each observation | |
for i in range(n): | |
newX[i, 1] *= X[i, j+1]**mValues[j] | |
# Compute the ordinary least squares estimate, i.e., the mean of the thetas | |
XX = np.transpose(newX).dot(newX) | |
det = np.linalg.det(XX) | |
if det > 1.0e-10: | |
XXinv = np.linalg.inv(XX) | |
XXinvX = XXinv.dot(np.transpose(newX)) | |
mean_theta = XXinvX.dot(y) | |
# Mean and standard deviation of this explanatory function | |
vectorSum = 0.0 | |
vectorSumSquared = 0.0 | |
for i in range(n): | |
value = newX[i, 1] | |
vectorSum += value | |
vectorSumSquared += value * value | |
meanExp = vectorSum / (float(n)) | |
varianceExp = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanExp * meanExp) | |
stdvExp = np.sqrt(varianceExp) | |
# Proceed only if stdvExp is reasonable | |
if (stdvExp / meanExp > 1e-8): | |
# Mean and standard deviation of y | |
vectorSum = 0.0 | |
vectorSumSquared = 0.0 | |
for i in range(n): | |
value = y[i] | |
vectorSum += value | |
vectorSumSquared += value * value | |
meanY = vectorSum / (float(n)) | |
varianceY = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanY * meanY) | |
stdvY = np.sqrt(varianceY) | |
# For now, do a simple check of correlation between y and the explanatory function | |
productSum = 0.0 | |
for i in range(n): | |
productSum += newX[i, 1] * y[i] | |
correlation = 1.0 / (float(n) - 1.0) * (productSum - n * meanExp * meanY) / (stdvExp * stdvY) | |
if correlation > 0.95: | |
print("Correlation = %.2f for explanatory function %.3g" % (correlation, mean_theta[1]), myString) | |
# Increment m-values | |
counter = 0 | |
for a in range(k-1): | |
if mValues[counter] < powerLimit: | |
mValues[counter] += 1 | |
break | |
else: | |
mValues[counter] = -powerLimit | |
counter += 1 | |
if a == k-1: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment