terjehaukaas/ModelInference.py

## ModelInference.py
# ------------------------------------------------------------------------
# The following Python code is implemented by Professor Terje Haukaas at
# the University of British Columbia in Vancouver, Canada. It is made
# freely available online at terje.civil.ubc.ca together with notes,
# examples, and additional Python code. Please be cautious when using
# this code; it may contain bugs and comes without warranty of any kind.
#
# In fact, this is a particularly quick-and-dirty implementation of
# Ordinary and Bayesian Linear Regression. One part of this code is a
# search for good explanatory functions. That isn't always meaningful
# but it provides an insight for the specific data given below, which
# was actually generated with a deterministic model with four input-
# parameters. If you know structural analysis, can you guess which
# model generated the data?
# ------------------------------------------------------------------------


# Import useful libraries
import numpy as np
import matplotlib.pyplot as plt


# Input data (first column=response, second=intercept, the rest are regressors)
data = np.array([(6.8,   1.0, 1000.0, 2000.0, 9500.0,  41096604.2),
                 (22.1,  1.0, 1572.0, 2532.0, 11573.0, 33260953.5),
                 (59.8,  1.0, 1288.0, 2801.0, 13647.0, 11565502.7),
                 (5.9,   1.0, 1543.0, 1370.0, 14614.0, 15284895.2),
                 (20.5,  1.0, 1572.0, 1113.0, 9872.0,  3562069.3),
                 (30.1,  1.0, 877.0,  2323.0, 12614.0, 9653979.2),
                 (176.1, 1.0, 1345.0, 2980.0, 12950.0, 5202934.7),
                 (11.0,  1.0, 1233.0, 2301.0, 15342.0, 29747448.2),
                 (26.4,  1.0, 1675.0, 1503.0, 10812.0, 6640981.3),
                 (21.2,  1.0, 1777.0, 2117.0, 12609.0, 21041461.3),
                 (4.7,   1.0, 843.0,  1448.0, 8606.0,  21041461.3),
                 (0.9,   1.0, 1909.0, 736.0,  9514.0,  31034422.7),
                 (35.9,  1.0, 1971.0, 1301.0, 13521.0, 2980441.3),
                 (0.6,   1.0, 1122.0, 759.0,  14311.0, 17859214.7),
                 (0.8,   1.0, 1833.0, 854.0,  11561.0, 40574196.0),
                 (24.0,  1.0, 1943.0, 2382.0, 9712.0,  37532448.0),
                 (39.3,  1.0, 1225.0, 1588.0, 11686.0, 3562069.3),
                 (21.8,  1.0, 1332.0, 1866.0, 15565.0, 8504460.2),
                 (40.0,  1.0, 1354.0, 1792.0, 15418.0, 4214833.3),
                 (63.3,  1.0, 548.0,  2988.0, 8489.0,  9067078.7),
                 (4.1,   1.0, 1257.0, 790.0,  12152.0, 4100925.2),
                 (9.6,   1.0, 1280.0, 2098.0, 12282.0, 33260953.5),
                 (48.2,  1.0, 1456.0, 2464.0, 13828.0, 10902678.2),
                 (0.5,   1.0, 1178.0, 797.0,  15198.0, 25333333.3),
                 (9.8,   1.0, 940.0,  2154.0, 9893.0,  32357991.2),
                 (66.0,  1.0, 1342.0, 2703.0, 12034.0, 11120725.3),
                 (41.7,  1.0, 1125.0, 2976.0, 10736.0, 22064924.8),
                 (8.0,   1.0, 738.0,  2139.0, 15346.0, 19726762.7),
                 (5.9,   1.0, 1982.0, 1148.0, 13479.0, 12490321.3),
                 (1.8,   1.0, 1882.0, 955.0,  8676.0,  34180559.8),
                 (5.6,   1.0, 1533.0, 1055.0, 10570.0, 10058989.5),
                 (13.0,  1.0, 1331.0, 2302.0, 11716.0, 35591509.3),
                 (55.5,  1.0, 712.0,  2693.0, 8302.0,  10058989.5),
                 (29.7,  1.0, 1002.0, 2848.0, 8850.0,  29326500.0),
                 (32.4,  1.0, 1787.0, 2024.0, 8113.0,  18777513.2),
                 (9.1,   1.0, 1303.0, 1932.0, 8888.0,  38528833.3),
                 (18.6,  1.0, 690.0,  1686.0, 9824.0,  6037642.7),
                 (0.8,   1.0, 1339.0, 922.0,  13510.0, 31034422.7),
                 (4.3,   1.0, 1084.0, 1905.0, 15503.0, 37532448.0),
                 (100.9, 1.0, 1274.0, 2290.0, 8373.0,  6037642.7)])


# Extract the y-vector and the X-matrix
rows = data.shape[0]
columns = len(data[0])
y = data[:,0]
X = data[:,1:columns]

# Determine number of regressors (k) and observations (n)
n = rows
k = columns-1
nu = n-k

# Print info and give warning if there is little data
print('\n'"There are", n, "observations and", k, "regressors")

# Compute the ordinary least squares estimate, i.e., the mean of the thetas
XX = np.transpose(X).dot(X)
XXinv = np.linalg.inv(XX)
XXinvX = XXinv.dot(np.transpose(X))
mean_theta = XXinvX.dot(y)

# Plot predictions vs. observations
plt.ion()
predictions = []
for i in range(n):
    model = 0
    for j in range(k):
        model += mean_theta[j] * X[i, j]

    predictions.append(model)

plt.plot(y, predictions, 'ko')
plt.xlabel("Observations")
plt.ylabel("Predictions")
print('\n'"Click somewhere in the plot to continue...")
plt.waitforbuttonpress()

# Compute X*theta and the ordinary least squares error variance
Xtheta = X.dot(mean_theta)
y_minus_Xtheta = y - Xtheta
dot_product_y_minus_Xtheta = (np.transpose(y_minus_Xtheta)).dot(y_minus_Xtheta)
s_squared = dot_product_y_minus_Xtheta/nu

# Compute the covariance matrix for the model parameters
covarianceMatrix = s_squared * XXinv

# Collect the standard deviations from the covariance matrix
stdv_theta = []
for i in range(k):
    stdv_theta.append(np.sqrt(covarianceMatrix[i, i]))

# Collect the coefficient of variation in a vector (in percent!)
cov_theta = []
for i in range(k):
    cov_theta.append(abs(100.0 * stdv_theta[i] / mean_theta[i]))

# Place 1/stdvs on the diagonal of the D^-1 matrix and extract the correlation matrix
Dinv = np.eye(k)
for i in range(k):
    Dinv[i,i] = 1.0/stdv_theta[i]
correlationMatrix = (Dinv.dot(covarianceMatrix)).dot(Dinv)

# Compute statistics for the epsilon variable
mean_sigma = np.sqrt(s_squared)
variance_sigma = s_squared/(2.0*(nu-4.0))
stdv_sigma = np.sqrt(variance_sigma)
cov_sigma = abs(stdv_sigma/mean_sigma)

# Print the results
print('\n'"Mean of Sigma = %.2f   C.o.v. of Sigma = %.2f" % (mean_sigma, 100.0*cov_sigma), "percent")
print('\n'"Posterior statistics for the model parameters (thetas):\n")
for i in range(k):
    print("     Mean(%i) = %9.3g   Cov(%i) = %.2f" % (i+1, mean_theta[i], i+1, cov_theta[i]), "percent")

# Loop over all possible explanatory functions of the form x1^m1 * x2^m2 * ...
if int(sum(data[:,1])) != n:
    print("WARNING: The search algorithm assumes 1.0 in the first column of the data")
powerLimit = 3
numCombinations = (2 * powerLimit +1)**(k-1)
mValues = np.full(k-1, -3)
print('\n'"Results from the search for explanatory functions, if any:")

for combinations in range(numCombinations):

    newX = np.ones((n, 2))
    myString = ""

    for j in range(k-1):

        # Record a string with the full explanatory function
        if mValues[j] != 0:

            if j != 0:
                myString += " "

            if mValues[j] < 0:
                myString += "/ "
            else:
                myString += "* "

            if mValues[j] == 1 or mValues[j] == -1:
                myString += ("x%i" % (j + 1))
            else:
                myString += ("x%i^%i" % (j + 1, abs(mValues[j])))

        # Calculate the value of this explanatory function for each observation
        for i in range(n):

            newX[i, 1] *= X[i, j+1]**mValues[j]

    # Compute the ordinary least squares estimate, i.e., the mean of the thetas
    XX = np.transpose(newX).dot(newX)

    det = np.linalg.det(XX)
    if det > 1.0e-10:
        XXinv = np.linalg.inv(XX)
        XXinvX = XXinv.dot(np.transpose(newX))
        mean_theta = XXinvX.dot(y)

    # Mean and standard deviation of this explanatory function
    vectorSum = 0.0
    vectorSumSquared = 0.0
    for i in range(n):
        value = newX[i, 1]
        vectorSum += value
        vectorSumSquared += value * value

    meanExp = vectorSum / (float(n))
    varianceExp = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanExp * meanExp)
    stdvExp = np.sqrt(varianceExp)

    # Proceed only if stdvExp is reasonable
    if (stdvExp / meanExp > 1e-8):

        # Mean and standard deviation of y
        vectorSum = 0.0
        vectorSumSquared = 0.0
        for i in range(n):
            value = y[i]
            vectorSum += value
            vectorSumSquared += value * value

        meanY = vectorSum / (float(n))
        varianceY = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanY * meanY)
        stdvY = np.sqrt(varianceY)

        # For now, do a simple check of correlation between y and the explanatory function
        productSum = 0.0
        for i in range(n):
            productSum += newX[i, 1] * y[i]

        correlation = 1.0 / (float(n) - 1.0) * (productSum - n * meanExp * meanY) / (stdvExp * stdvY)

        if correlation > 0.95:
            print("Correlation = %.2f for explanatory function %.3g" % (correlation, mean_theta[1]), myString)

    # Increment m-values
    counter = 0
    for a in range(k-1):
        if mValues[counter] < powerLimit:
            mValues[counter] += 1
            break
        else:
            mValues[counter] = -powerLimit
            counter += 1
            if a == k-1:
                break
	# ------------------------------------------------------------------------
	# The following Python code is implemented by Professor Terje Haukaas at
	# the University of British Columbia in Vancouver, Canada. It is made
	# freely available online at terje.civil.ubc.ca together with notes,
	# examples, and additional Python code. Please be cautious when using
	# this code; it may contain bugs and comes without warranty of any kind.
	#
	# In fact, this is a particularly quick-and-dirty implementation of
	# Ordinary and Bayesian Linear Regression. One part of this code is a
	# search for good explanatory functions. That isn't always meaningful
	# but it provides an insight for the specific data given below, which
	# was actually generated with a deterministic model with four input-
	# parameters. If you know structural analysis, can you guess which
	# model generated the data?
	# ------------------------------------------------------------------------


	# Import useful libraries
	import numpy as np
	import matplotlib.pyplot as plt


	# Input data (first column=response, second=intercept, the rest are regressors)
	data = np.array([(6.8, 1.0, 1000.0, 2000.0, 9500.0, 41096604.2),
	(22.1, 1.0, 1572.0, 2532.0, 11573.0, 33260953.5),
	(59.8, 1.0, 1288.0, 2801.0, 13647.0, 11565502.7),
	(5.9, 1.0, 1543.0, 1370.0, 14614.0, 15284895.2),
	(20.5, 1.0, 1572.0, 1113.0, 9872.0, 3562069.3),
	(30.1, 1.0, 877.0, 2323.0, 12614.0, 9653979.2),
	(176.1, 1.0, 1345.0, 2980.0, 12950.0, 5202934.7),
	(11.0, 1.0, 1233.0, 2301.0, 15342.0, 29747448.2),
	(26.4, 1.0, 1675.0, 1503.0, 10812.0, 6640981.3),
	(21.2, 1.0, 1777.0, 2117.0, 12609.0, 21041461.3),
	(4.7, 1.0, 843.0, 1448.0, 8606.0, 21041461.3),
	(0.9, 1.0, 1909.0, 736.0, 9514.0, 31034422.7),
	(35.9, 1.0, 1971.0, 1301.0, 13521.0, 2980441.3),
	(0.6, 1.0, 1122.0, 759.0, 14311.0, 17859214.7),
	(0.8, 1.0, 1833.0, 854.0, 11561.0, 40574196.0),
	(24.0, 1.0, 1943.0, 2382.0, 9712.0, 37532448.0),
	(39.3, 1.0, 1225.0, 1588.0, 11686.0, 3562069.3),
	(21.8, 1.0, 1332.0, 1866.0, 15565.0, 8504460.2),
	(40.0, 1.0, 1354.0, 1792.0, 15418.0, 4214833.3),
	(63.3, 1.0, 548.0, 2988.0, 8489.0, 9067078.7),
	(4.1, 1.0, 1257.0, 790.0, 12152.0, 4100925.2),
	(9.6, 1.0, 1280.0, 2098.0, 12282.0, 33260953.5),
	(48.2, 1.0, 1456.0, 2464.0, 13828.0, 10902678.2),
	(0.5, 1.0, 1178.0, 797.0, 15198.0, 25333333.3),
	(9.8, 1.0, 940.0, 2154.0, 9893.0, 32357991.2),
	(66.0, 1.0, 1342.0, 2703.0, 12034.0, 11120725.3),
	(41.7, 1.0, 1125.0, 2976.0, 10736.0, 22064924.8),
	(8.0, 1.0, 738.0, 2139.0, 15346.0, 19726762.7),
	(5.9, 1.0, 1982.0, 1148.0, 13479.0, 12490321.3),
	(1.8, 1.0, 1882.0, 955.0, 8676.0, 34180559.8),
	(5.6, 1.0, 1533.0, 1055.0, 10570.0, 10058989.5),
	(13.0, 1.0, 1331.0, 2302.0, 11716.0, 35591509.3),
	(55.5, 1.0, 712.0, 2693.0, 8302.0, 10058989.5),
	(29.7, 1.0, 1002.0, 2848.0, 8850.0, 29326500.0),
	(32.4, 1.0, 1787.0, 2024.0, 8113.0, 18777513.2),
	(9.1, 1.0, 1303.0, 1932.0, 8888.0, 38528833.3),
	(18.6, 1.0, 690.0, 1686.0, 9824.0, 6037642.7),
	(0.8, 1.0, 1339.0, 922.0, 13510.0, 31034422.7),
	(4.3, 1.0, 1084.0, 1905.0, 15503.0, 37532448.0),
	(100.9, 1.0, 1274.0, 2290.0, 8373.0, 6037642.7)])


	# Extract the y-vector and the X-matrix
	rows = data.shape[0]
	columns = len(data[0])
	y = data[:,0]
	X = data[:,1:columns]

	# Determine number of regressors (k) and observations (n)
	n = rows
	k = columns-1
	nu = n-k

	# Print info and give warning if there is little data
	print('\n'"There are", n, "observations and", k, "regressors")

	# Compute the ordinary least squares estimate, i.e., the mean of the thetas
	XX = np.transpose(X).dot(X)
	XXinv = np.linalg.inv(XX)
	XXinvX = XXinv.dot(np.transpose(X))
	mean_theta = XXinvX.dot(y)

	# Plot predictions vs. observations
	plt.ion()
	predictions = []
	for i in range(n):
	model = 0
	for j in range(k):
	model += mean_theta[j] * X[i, j]

	predictions.append(model)

	plt.plot(y, predictions, 'ko')
	plt.xlabel("Observations")
	plt.ylabel("Predictions")
	print('\n'"Click somewhere in the plot to continue...")
	plt.waitforbuttonpress()

	# Compute X*theta and the ordinary least squares error variance
	Xtheta = X.dot(mean_theta)
	y_minus_Xtheta = y - Xtheta
	dot_product_y_minus_Xtheta = (np.transpose(y_minus_Xtheta)).dot(y_minus_Xtheta)
	s_squared = dot_product_y_minus_Xtheta/nu

	# Compute the covariance matrix for the model parameters
	covarianceMatrix = s_squared * XXinv

	# Collect the standard deviations from the covariance matrix
	stdv_theta = []
	for i in range(k):
	stdv_theta.append(np.sqrt(covarianceMatrix[i, i]))

	# Collect the coefficient of variation in a vector (in percent!)
	cov_theta = []
	for i in range(k):
	cov_theta.append(abs(100.0 * stdv_theta[i] / mean_theta[i]))

	# Place 1/stdvs on the diagonal of the D^-1 matrix and extract the correlation matrix
	Dinv = np.eye(k)
	for i in range(k):
	Dinv[i,i] = 1.0/stdv_theta[i]
	correlationMatrix = (Dinv.dot(covarianceMatrix)).dot(Dinv)

	# Compute statistics for the epsilon variable
	mean_sigma = np.sqrt(s_squared)
	variance_sigma = s_squared/(2.0*(nu-4.0))
	stdv_sigma = np.sqrt(variance_sigma)
	cov_sigma = abs(stdv_sigma/mean_sigma)

	# Print the results
	print('\n'"Mean of Sigma = %.2f C.o.v. of Sigma = %.2f" % (mean_sigma, 100.0*cov_sigma), "percent")
	print('\n'"Posterior statistics for the model parameters (thetas):\n")
	for i in range(k):
	print(" Mean(%i) = %9.3g Cov(%i) = %.2f" % (i+1, mean_theta[i], i+1, cov_theta[i]), "percent")

	# Loop over all possible explanatory functions of the form x1^m1 * x2^m2 * ...
	if int(sum(data[:,1])) != n:
	print("WARNING: The search algorithm assumes 1.0 in the first column of the data")
	powerLimit = 3
	numCombinations = (2 * powerLimit +1)**(k-1)
	mValues = np.full(k-1, -3)
	print('\n'"Results from the search for explanatory functions, if any:")

	for combinations in range(numCombinations):

	newX = np.ones((n, 2))
	myString = ""

	for j in range(k-1):

	# Record a string with the full explanatory function
	if mValues[j] != 0:

	if j != 0:
	myString += " "

	if mValues[j] < 0:
	myString += "/ "
	else:
	myString += "* "

	if mValues[j] == 1 or mValues[j] == -1:
	myString += ("x%i" % (j + 1))
	else:
	myString += ("x%i^%i" % (j + 1, abs(mValues[j])))

	# Calculate the value of this explanatory function for each observation
	for i in range(n):

	newX[i, 1] = X[i, j+1]*mValues[j]

	# Compute the ordinary least squares estimate, i.e., the mean of the thetas
	XX = np.transpose(newX).dot(newX)

	det = np.linalg.det(XX)
	if det > 1.0e-10:
	XXinv = np.linalg.inv(XX)
	XXinvX = XXinv.dot(np.transpose(newX))
	mean_theta = XXinvX.dot(y)

	# Mean and standard deviation of this explanatory function
	vectorSum = 0.0
	vectorSumSquared = 0.0
	for i in range(n):
	value = newX[i, 1]
	vectorSum += value
	vectorSumSquared += value * value

	meanExp = vectorSum / (float(n))
	varianceExp = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanExp * meanExp)
	stdvExp = np.sqrt(varianceExp)

	# Proceed only if stdvExp is reasonable
	if (stdvExp / meanExp > 1e-8):

	# Mean and standard deviation of y
	vectorSum = 0.0
	vectorSumSquared = 0.0
	for i in range(n):
	value = y[i]
	vectorSum += value
	vectorSumSquared += value * value

	meanY = vectorSum / (float(n))
	varianceY = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanY * meanY)
	stdvY = np.sqrt(varianceY)

	# For now, do a simple check of correlation between y and the explanatory function
	productSum = 0.0
	for i in range(n):
	productSum += newX[i, 1] * y[i]

	correlation = 1.0 / (float(n) - 1.0) * (productSum - n * meanExp * meanY) / (stdvExp * stdvY)

	if correlation > 0.95:
	print("Correlation = %.2f for explanatory function %.3g" % (correlation, mean_theta[1]), myString)

	# Increment m-values
	counter = 0
	for a in range(k-1):
	if mValues[counter] < powerLimit:
	mValues[counter] += 1
	break
	else:
	mValues[counter] = -powerLimit
	counter += 1
	if a == k-1:
	break