Created
December 6, 2022 12:55
-
-
Save vikram-s-narayan/76e3e7b99ceff5fa5941223560b69133 to your computer and use it in GitHub Desktop.
Standalone GEKPLS to help pinpoint bug in QuasiMonteCarlo.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The purpose of this gist is to help pinpoint a potential bug in | |
# QuasiMonteCarlo. This gist shows the working of the GEKPLS surrogate model | |
# without the potential interferences from other Surrogates dependencies. | |
# To run the gist and reproduce the results: | |
# Run this gist with QuasiMonteCarlo@0.2.16 => all tests pass (evidenced by 3 true prints) | |
# Then run this gist with QuasiMonteCarlo@0.2.19 => all tests fail in a fashion similar to | |
# the failing tests of Surogates.jl - https://github.com/SciML/Surrogates.jl/pull/420 | |
# Additional notes: | |
# - Ensure that no other packages that depend on QuasiMonteCarlo are in your package environment | |
# - QuasiMonteCarlo@0.2.17 and QuasiMonteCarlo@0.2.18 throw errors. More info for the cause can | |
# be seen here - https://github.com/SciML/QuasiMonteCarlo.jl/releases/tag/v0.2.17 | |
using LinearAlgebra | |
using Statistics | |
using Zygote | |
using QuasiMonteCarlo | |
abstract type AbstractSurrogate <: Function end | |
""" | |
GEKPLS(x, y, x_matrix, y_matrix, grads, xlimits, delta_x, extra_points, n_comp, beta, gamma, theta, | |
reduced_likelihood_function_value, | |
X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) | |
""" | |
mutable struct GEKPLS{T, X, Y} <: AbstractSurrogate | |
x::X | |
y::Y | |
x_matrix::Matrix{T} #1 | |
y_matrix::Matrix{T} #2 | |
grads::Matrix{T} #3 | |
xl::Matrix{T} #xlimits #4 | |
delta::T #5 | |
extra_points::Int #6 | |
num_components::Int #7 | |
beta::Vector{T} #8 | |
gamma::Matrix{T} #9 | |
theta::Vector{T} #10 | |
reduced_likelihood_function_value::T #11 | |
X_offset::Matrix{T} #12 | |
X_scale::Matrix{T} #13 | |
X_after_std::Matrix{T} #14 - X after standardization | |
pls_mean::Matrix{T} #15 | |
y_mean::T #16 | |
y_std::T #17 | |
end | |
""" | |
bounds_error(x, xl) | |
Checks if input x values are within range of upper and lower bounds | |
""" | |
function bounds_error(x, xl) | |
num_x_rows = size(x, 1) | |
num_dim = size(xl, 1) | |
for i in 1:num_x_rows | |
for j in 1:num_dim | |
if (x[i, j] < xl[j, 1] || x[i, j] > xl[j, 2]) | |
return true | |
end | |
end | |
end | |
return false | |
end | |
""" | |
GEKPLS(X, y, grads, n_comp, delta_x, lb, ub, extra_points, theta) | |
Constructor for GEKPLS Struct | |
- x_vec: vector of tuples with x values | |
- y_vec: vector of floats with outputs | |
- grads_vec: gradients associated with each of the X points | |
- n_comp: number of components | |
- lb: lower bounds | |
- ub: upper bounds | |
- delta_x: step size while doing Taylor approximation | |
- extra_points: number of points to consider | |
- theta: initial expected variance of PLS regression components | |
""" | |
function GEKPLS(x_vec, y_vec, grads_vec, n_comp, delta_x, lb, ub, extra_points, theta) | |
xlimits = hcat(lb, ub) | |
X = vector_of_tuples_to_matrix(x_vec) | |
y = reshape(y_vec, (size(X, 1), 1)) | |
grads = vector_of_tuples_to_matrix2(grads_vec) | |
#ensure that X values are within the upper and lower bounds | |
if bounds_error(X, xlimits) | |
println("X values outside bounds") | |
return | |
end | |
pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(X, y, n_comp, grads, delta_x, | |
xlimits, extra_points) | |
X_after_std, y_after_std, X_offset, y_mean, X_scale, y_std = standardization(X_after_PLS, | |
y_after_PLS) | |
D, ij = cross_distances(X_after_std) | |
pls_mean_reshaped = reshape(pls_mean, (size(X, 2), n_comp)) | |
d = componentwise_distance_PLS(D, "squar_exp", n_comp, pls_mean_reshaped) | |
nt, nd = size(X_after_PLS) | |
beta, gamma, reduced_likelihood_function_value = _reduced_likelihood_function(theta, | |
"squar_exp", | |
d, nt, ij, | |
y_after_std) | |
return GEKPLS(x_vec, y_vec, X, y, grads, xlimits, delta_x, extra_points, n_comp, beta, | |
gamma, theta, | |
reduced_likelihood_function_value, | |
X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) | |
end | |
""" | |
(g::GEKPLS)(X_test) | |
Take in a set of one or more points and provide predicted approximate outputs (predictor). | |
""" | |
function (g::GEKPLS)(x_vec) | |
# _check_dimension(g, x_vec) | |
X_test = prep_data_for_pred(x_vec) | |
n_eval, n_features_x = size(X_test) | |
X_cont = (X_test .- g.X_offset) ./ g.X_scale | |
dx = differences(X_cont, g.X_after_std) | |
pred_d = componentwise_distance_PLS(dx, "squar_exp", g.num_components, g.pls_mean) | |
nt = size(g.X_after_std, 1) | |
r = transpose(reshape(squar_exp(g.theta, pred_d), (nt, n_eval))) | |
f = ones(n_eval, 1) | |
y_ = (f * g.beta) + (r * g.gamma) | |
y = g.y_mean .+ g.y_std * y_ | |
return y[1] | |
end | |
""" | |
add_point!(g::GEKPLS, new_x, new_y, new_grads) | |
add a new point to the dataset. | |
""" | |
function add_point!(g::GEKPLS, x_tup, y_val, grad_tup) | |
new_x = prep_data_for_pred(x_tup) | |
new_grads = prep_data_for_pred(grad_tup) | |
if vec(new_x) in eachrow(g.x_matrix) | |
println("Adding a sample that already exists. Cannot build GEKPLS") | |
return | |
end | |
if bounds_error(new_x, g.xl) | |
println("x values outside bounds") | |
return | |
end | |
temp_y = copy(g.y) #without the copy here, we get error ("cannot resize array with shared data") | |
push!(g.x, x_tup) | |
push!(temp_y, y_val) | |
g.y = temp_y | |
g.x_matrix = vcat(g.x_matrix, new_x) | |
g.y_matrix = vcat(g.y_matrix, y_val) | |
g.grads = vcat(g.grads, new_grads) | |
pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(g.x_matrix, g.y_matrix, | |
g.num_components, | |
g.grads, g.delta, g.xl, | |
g.extra_points) | |
g.X_after_std, y_after_std, g.X_offset, g.y_mean, g.X_scale, g.y_std = standardization(X_after_PLS, | |
y_after_PLS) | |
D, ij = cross_distances(g.X_after_std) | |
g.pls_mean = reshape(pls_mean, (size(g.x_matrix, 2), g.num_components)) | |
d = componentwise_distance_PLS(D, "squar_exp", g.num_components, g.pls_mean) | |
nt, nd = size(X_after_PLS) | |
g.beta, g.gamma, g.reduced_likelihood_function_value = _reduced_likelihood_function(g.theta, | |
"squar_exp", | |
d, | |
nt, | |
ij, | |
y_after_std) | |
end | |
""" | |
_ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) | |
Gradient-enhanced PLS-coefficients. | |
Parameters | |
---------- | |
- X: [n_obs,dim] - The input variables. | |
- y: [n_obs,ny] - The output variable | |
- n_comp: int - Number of principal components used. | |
- gradients: - The gradient values. Matrix size (n_obs,dim) | |
- delta_x: real - The step used in the First Order Taylor Approximation | |
- xlimits: [dim, 2]- The upper and lower var bounds. | |
- extra_points: int - The number of extra points per each training point. | |
Returns | |
------- | |
- Coeff_pls: [dim, n_comp] - The PLS-coefficients. | |
- X: Concatenation of XX: [extra_points*nt, dim] - Extra points added (when extra_points > 0) and X | |
- y: Concatenation of yy[extra_points*nt, 1]- Extra points added (when extra_points > 0) and y | |
""" | |
function _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) | |
# this function is equivalent to a combination of | |
# https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/utils/kriging_utils.py#L1036 | |
# and https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/surrogate_models/GEKPLS.py#L48 | |
nt, dim = size(X) | |
XX = zeros(0, dim) | |
yy = zeros(0, size(y)[2]) | |
coeff_pls = zeros((dim, n_comp, nt)) | |
for i in 1:nt | |
if dim >= 3 | |
bb_vals = circshift(boxbehnken(dim, 1), 1) | |
else | |
bb_vals = [0.0 0.0; #center | |
1.0 0.0; #right | |
0.0 1.0; #up | |
-1.0 0.0; #left | |
0.0 -1.0; #down | |
1.0 1.0; #right up | |
-1.0 1.0; #left up | |
-1.0 -1.0; #left down | |
1.0 -1.0] | |
end | |
_X = zeros((size(bb_vals)[1], dim)) | |
_y = zeros((size(bb_vals)[1], 1)) | |
bb_vals = bb_vals .* (delta_x * (xlimits[:, 2] - xlimits[:, 1]))' #smt calls this sign. I've called it bb_vals | |
_X = X[i, :]' .+ bb_vals | |
bb_vals = bb_vals .* grads[i, :]' | |
_y = y[i, :] .+ sum(bb_vals, dims = 2) | |
#_pls.fit(_X, _y) # relic from sklearn versiom; retained for future reference. | |
#coeff_pls[:, :, i] = _pls.x_rotations_ #relic from sklearn versiom; retained for future reference. | |
coeff_pls[:, :, i] = _modified_pls(_X, _y, n_comp) #_modified_pls returns the equivalent of SKLearn's _pls.x_rotations_ | |
if extra_points != 0 | |
start_index = max(1, length(coeff_pls[:, 1, i]) - extra_points + 1) | |
max_coeff = sortperm(broadcast(abs, coeff_pls[:, 1, i]))[start_index:end] | |
for ii in max_coeff | |
XX = [XX; transpose(X[i, :])] | |
XX[end, ii] += delta_x * (xlimits[ii, 2] - xlimits[ii, 1]) | |
yy = [yy; y[i]] | |
yy[end] += grads[i, ii] * delta_x * (xlimits[ii, 2] - xlimits[ii, 1]) | |
end | |
end | |
end | |
if extra_points != 0 | |
X = [X; XX] | |
y = [y; yy] | |
end | |
pls_mean = mean(broadcast(abs, coeff_pls), dims = 3) | |
return pls_mean, X, y | |
end | |
######start of bbdesign###### | |
# | |
# Adapted from 'ExperimentalDesign.jl: Design of Experiments in Julia' | |
# https://github.com/phrb/ExperimentalDesign.jl | |
# MIT License | |
# ExperimentalDesign.jl: Design of Experiments in Julia | |
# Copyright (C) 2019 Pedro Bruel <pedro.bruel@gmail.com> | |
# Permission is hereby granted, free of charge, to any person obtaining a copy of | |
# this software and associated documentation files (the "Software"), to deal in | |
# the Software without restriction, including without limitation the rights to | |
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | |
# the Software, and to permit persons to whom the Software is furnished to do so, | |
# subject to the following conditions: | |
# The above copyright notice and this permission notice (including the next | |
# paragraph) shall be included in all copies or substantial portions of the | |
# Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
# | |
function boxbehnken(matrix_size::Int) | |
boxbehnken(matrix_size, matrix_size) | |
end | |
function boxbehnken(matrix_size::Int, center::Int) | |
@assert matrix_size >= 3 | |
A_fact = explicit_fullfactorial(Tuple([-1, 1] for i in 1:2)) | |
rows = floor(Int, (0.5 * matrix_size * (matrix_size - 1)) * size(A_fact)[1]) | |
A = zeros(rows, matrix_size) | |
l = 0 | |
for i in 1:(matrix_size - 1) | |
for j in (i + 1):matrix_size | |
l = l + 1 | |
A[(max(0, (l - 1) * size(A_fact)[1]) + 1):(l * size(A_fact)[1]), i] = A_fact[:, | |
1] | |
A[(max(0, (l - 1) * size(A_fact)[1]) + 1):(l * size(A_fact)[1]), j] = A_fact[:, | |
2] | |
end | |
end | |
if center == matrix_size | |
if matrix_size <= 16 | |
points = [0, 0, 3, 3, 6, 6, 6, 8, 9, 10, 12, 12, 13, 14, 15, 16] | |
center = points[matrix_size] | |
end | |
end | |
A = transpose(hcat(transpose(A), transpose(zeros(center, matrix_size)))) | |
end | |
function explicit_fullfactorial(factors::Tuple) | |
explicit_fullfactorial(fullfactorial(factors)) | |
end | |
function explicit_fullfactorial(iterator::Base.Iterators.ProductIterator) | |
hcat(vcat.(collect(iterator)...)...) | |
end | |
function fullfactorial(factors::Tuple) | |
Base.Iterators.product(factors...) | |
end | |
######end of bb design###### | |
""" | |
We substract the mean from each variable. Then, we divide the values of each | |
variable by its standard deviation. | |
Parameters | |
---------- | |
X - The input variables. | |
y - The output variable. | |
Returns | |
------- | |
X: [n_obs, dim] | |
The standardized input matrix. | |
y: [n_obs, 1] | |
The standardized output vector. | |
X_offset: The mean (or the min if scale_X_to_unit=True) of each input variable. | |
y_mean: The mean of the output variable. | |
X_scale: The standard deviation of each input variable. | |
y_std: The standard deviation of the output variable. | |
""" | |
function standardization(X, y) | |
#Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L21 | |
X_offset = mean(X, dims = 1) | |
X_scale = std(X, dims = 1) | |
X_scale = map(x -> (x == 0.0) ? x = 1 : x = x, X_scale) #to prevent division by 0 below | |
y_mean = mean(y) | |
y_std = std(y) | |
y_std = map(y -> (y == 0) ? y = 1 : y = y, y_std) #to prevent division by 0 below | |
X = (X .- X_offset) ./ X_scale | |
y = (y .- y_mean) ./ y_std | |
return X, y, X_offset, y_mean, X_scale, y_std | |
end | |
""" | |
Computes the nonzero componentwise cross-distances between the vectors | |
in X | |
Parameters | |
---------- | |
X: [n_obs, dim] | |
Returns | |
------- | |
D: [n_obs * (n_obs - 1) / 2, dim] | |
- The cross-distances between the vectors in X. | |
ij: [n_obs * (n_obs - 1) / 2, 2] | |
- The indices i and j of the vectors in X associated to the cross- | |
distances in D. | |
""" | |
function cross_distances(X) | |
# equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L86 | |
n_samples, n_features = size(X) | |
n_nonzero_cross_dist = (n_samples * (n_samples - 1)) ÷ 2 | |
ij = zeros((n_nonzero_cross_dist, 2)) | |
D = zeros((n_nonzero_cross_dist, n_features)) | |
ll_1 = 0 | |
for k in 1:(n_samples - 1) | |
ll_0 = ll_1 + 1 | |
ll_1 = ll_0 + n_samples - k - 1 | |
ij[ll_0:ll_1, 1] .= k | |
ij[ll_0:ll_1, 2] = (k + 1):1:n_samples | |
D[ll_0:ll_1, :] = -(X[(k + 1):n_samples, :] .- X[k, :]') | |
end | |
return D, Int.(ij) | |
end | |
""" | |
Computes the nonzero componentwise cross-spatial-correlation-distance | |
between the vectors in X. | |
Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 | |
with some simplifications (removed theta and return_derivative as it's not required for GEKPLS) | |
Parameters | |
---------- | |
D: [n_obs * (n_obs - 1) / 2, dim] | |
- The L1 cross-distances between the vectors in X. | |
corr: str | |
- Name of the correlation function used. | |
squar_exp or abs_exp. | |
n_comp: int | |
- Number of principal components used. | |
coeff_pls: [dim, n_comp] | |
- The PLS-coefficients. | |
Returns | |
------- | |
D_corr: [n_obs * (n_obs - 1) / 2, n_comp] | |
- The componentwise cross-spatial-correlation-distance between the | |
vectors in X. | |
""" | |
function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) | |
#equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 | |
#todo | |
#figure out how to handle this computation in the case of very large matrices | |
#similar to what SMT has done | |
#at https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 | |
D_corr = zeros((size(D)[1], n_comp)) | |
if corr == "squar_exp" | |
D_corr = D .^ 2 * coeff_pls .^ 2 | |
else #abs_exp | |
D_corr = abs.(D) * abs.(coeff_pls) | |
end | |
return D_corr | |
end | |
""" | |
Squared exponential correlation model. | |
Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L604 | |
Parameters: | |
----------- | |
theta : Hyperparameters of the correlation model | |
d: componentwise distances from componentwise_distance_PLS | |
Returns: | |
-------- | |
r: array containing the values of the autocorrelation model | |
""" | |
function squar_exp(theta, d) | |
n_components = size(d)[2] | |
theta = reshape(theta, (1, n_components)) | |
return exp.(-sum(theta .* d, dims = 2)) | |
end | |
""" | |
differences(X, Y) | |
return differences between two arrays | |
given an input like this: | |
X = [1.0 1.0 1.0; 2.0 2.0 2.0] | |
Y = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0] | |
diff = differences(X,Y) | |
We get an output (diff) that looks like this: | |
[ 0. -1. -2. | |
-3. -4. -5. | |
-6. -7. -8. | |
1. 0. -1. | |
-2. -3. -4. | |
-5. -6. -7.] | |
""" | |
function differences(X, Y) | |
#equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L392 | |
#code credit: Elias Carvalho - https://stackoverflow.com/questions/72392010/row-wise-operations-between-matrices-in-julia | |
Rx = repeat(X, inner = (size(Y, 1), 1)) | |
Ry = repeat(Y, size(X, 1)) | |
return Rx - Ry | |
end | |
""" | |
_reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0) | |
Compute the reduced likelihood function value and other coefficients necessary for prediction | |
This function is a loose translation of SMT code from | |
https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 | |
It determines the BLUP parameters and evaluates the reduced likelihood function for the given theta. | |
Parameters | |
---------- | |
theta: array containing the parameters at which the Gaussian Process model parameters should be determined. | |
kernel_type: name of the correlation function. | |
d: The componentwise cross-spatial-correlation-distance between the vectors in X. | |
nt: number of training points | |
ij: The indices i and j of the vectors in X associated to the cross-distances in D. | |
y_norma: Standardized y values | |
noise: noise hyperparameter - increasing noise reduces reduced_likelihood_function_value | |
Returns | |
------- | |
reduced_likelihood_function_value: real | |
- The value of the reduced likelihood function associated with the given autocorrelation parameters theta. | |
beta: Generalized least-squares regression weights | |
gamma: Gaussian Process weights. | |
""" | |
function _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0) | |
#equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 | |
reduced_likelihood_function_value = -Inf | |
nugget = 1000000.0 * eps() #a jitter for numerical stability; reducing the multiple from 1000000.0 results in positive definite error for Cholesky decomposition; | |
if kernel_type == "squar_exp" #todo - add other kernel type abs_exp etc. | |
r = squar_exp(theta, d) | |
end | |
R = (I + zeros(nt, nt)) .* (1.0 + nugget + noise) | |
for k in 1:size(ij)[1] | |
R[ij[k, 1], ij[k, 2]] = r[k] | |
R[ij[k, 2], ij[k, 1]] = r[k] | |
end | |
C = cholesky(R).L #todo - #values diverge at this point from SMT code; verify impact | |
F = ones(nt, 1) #todo - examine if this should be a parameter for this function | |
Ft = C \ F | |
Q, G = qr(Ft) | |
Q = Array(Q) | |
Yt = C \ y_norma | |
#todo - in smt, they check if the matrix is ill-conditioned using SVD. Verify and include if necessary | |
beta = G \ [(transpose(Q) ⋅ Yt)] | |
rho = Yt .- (Ft .* beta) | |
gamma = transpose(C) \ rho | |
sigma2 = sum((rho) .^ 2, dims = 1) / nt | |
detR = prod(diag(C) .^ (2.0 / nt)) | |
reduced_likelihood_function_value = -nt * log10(sum(sigma2)) - nt * log10(detR) | |
return beta, gamma, reduced_likelihood_function_value | |
end | |
### MODIFIED PLS BELOW ### | |
# The code below is a simplified version of | |
# SKLearn's PLS | |
# https://github.com/scikit-learn/scikit-learn/blob/80598905e/sklearn/cross_decomposition/_pls.py | |
# It is completely self-contained (no dependencies) | |
function _center_scale(X, Y) | |
x_mean = mean(X, dims = 1) | |
X .-= x_mean | |
y_mean = mean(Y, dims = 1) | |
Y .-= y_mean | |
x_std = std(X, dims = 1) | |
x_std[x_std .== 0] .= 1.0 | |
X ./= x_std | |
y_std = std(Y, dims = 1) | |
y_std[y_std .== 0] .= 1.0 | |
Y ./= y_std | |
return X, Y | |
end | |
function _svd_flip_1d(u, v) | |
# equivalent of https://github.com/scikit-learn/scikit-learn/blob/80598905e517759b4696c74ecc35c6e2eb508cff/sklearn/cross_decomposition/_pls.py#L149 | |
biggest_abs_val_idx = findmax(abs.(vec(u)))[2] | |
sign_ = sign(u[biggest_abs_val_idx]) | |
u .*= sign_ | |
v .*= sign_ | |
end | |
function _get_first_singular_vectors_power_method(X, Y) | |
my_eps = eps() | |
y_score = vec(Y) | |
x_weights = transpose(X)y_score / dot(y_score, y_score) | |
x_weights ./= (sqrt(dot(x_weights, x_weights)) + my_eps) | |
x_score = X * x_weights | |
y_weights = transpose(Y)x_score / dot(x_score, x_score) | |
y_score = Y * y_weights / (dot(y_weights, y_weights) + my_eps) | |
#Equivalent in intent to https://github.com/scikit-learn/scikit-learn/blob/80598905e517759b4696c74ecc35c6e2eb508cff/sklearn/cross_decomposition/_pls.py#L66 | |
if any(isnan.(x_weights)) || any(isnan.(y_weights)) | |
return false, false | |
end | |
return x_weights, y_weights | |
end | |
function _modified_pls(X, Y, n_components) | |
x_weights_ = zeros(size(X, 2), n_components) | |
_x_scores = zeros(size(X, 1), n_components) | |
x_loadings_ = zeros(size(X, 2), n_components) | |
Xk, Yk = _center_scale(X, Y) | |
for k in 1:n_components | |
x_weights, y_weights = _get_first_singular_vectors_power_method(Xk, Yk) | |
if x_weights == false | |
break | |
end | |
_svd_flip_1d(x_weights, y_weights) | |
x_scores = Xk * x_weights | |
x_loadings = transpose(x_scores)Xk / dot(x_scores, x_scores) | |
Xk = Xk - (x_scores * x_loadings) | |
y_loadings = transpose(x_scores) * Yk / dot(x_scores, x_scores) | |
Yk = Yk - x_scores * y_loadings | |
x_weights_[:, k] = x_weights | |
_x_scores[:, k] = x_scores | |
x_loadings_[:, k] = vec(x_loadings) | |
end | |
x_rotations_ = x_weights_ * pinv(transpose(x_loadings_)x_weights_) | |
return x_rotations_ | |
end | |
### MODIFIED PLS ABOVE ### | |
### BELOW ARE HELPER FUNCTIONS TO HELP MODIFY VECTORS INTO ARRAYS | |
function vector_of_tuples_to_matrix(v) | |
num_rows = length(v) | |
num_cols = length(first(v)) | |
K = zeros(num_rows, num_cols) | |
for row in 1:num_rows | |
for col in 1:num_cols | |
K[row, col] = v[row][col] | |
end | |
end | |
return K | |
end | |
function vector_of_tuples_to_matrix2(v) | |
#convert gradients into matrix form | |
num_rows = length(v) | |
num_cols = length(first(first(v))) | |
K = zeros(num_rows, num_cols) | |
for row in 1:num_rows | |
for col in 1:num_cols | |
K[row, col] = v[row][1][col] | |
end | |
end | |
return K | |
end | |
function prep_data_for_pred(v) | |
l = length(first(v)) | |
if (l == 1) | |
return [tup[k] for k in 1:1, tup in v] | |
end | |
return [tup[k] for tup in v, k in 1:l] | |
end | |
############################################################ | |
####TESTS ARE BELOW ### | |
# ## welded beam tests | |
function welded_beam(x) | |
h = x[1] | |
l = x[2] | |
t = x[3] | |
a = 6000 / (sqrt(2) * h * l) | |
b = (6000 * (14 + 0.5 * l) * sqrt(0.25 * (l^2 + (h + t)^2))) / | |
(2 * (0.707 * h * l * (l^2 / 12 + 0.25 * (h + t)^2))) | |
return (sqrt(a^2 + b^2 + l * a * b)) / (sqrt(0.25 * (l^2 + (h + t)^2))) | |
end | |
n = 1000 #change back to 1000 | |
lb = [0.125, 5.0, 5.0] | |
ub = [1.0, 10.0, 10.0] | |
x_pre = transpose(QuasiMonteCarlo.sample(n, lb, ub, SobolSample())) | |
x = [Tuple(r) for r in eachrow(x_pre)] | |
grads = gradient.(welded_beam, x) | |
y = welded_beam.(x) | |
n_test = 100 | |
x_test_pre = transpose(QuasiMonteCarlo.sample(n_test, lb, ub, GoldenSample())) | |
x_test = [Tuple(r) for r in eachrow(x_test_pre)] | |
y_true = welded_beam.(x_test) | |
n_comp = 3 | |
delta_x = 0.0001 | |
extra_points = 2 | |
initial_theta = [0.01 for i in 1:n_comp] | |
g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) #change back to GEKPLS | |
y_pred = g.(x_test) | |
rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) | |
println(isapprox(rmse, 39.0, atol = 0.5)) #rmse: 38.988 | |
#println(rmse) | |
n_comp = 2 | |
delta_x = 0.0001 | |
extra_points = 2 | |
initial_theta = [0.01 for i in 1:n_comp] | |
g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) | |
y_pred = g.(x_test) | |
rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) | |
println(isapprox(rmse, 39.5, atol = 0.5)) #rmse: 39.481 | |
#println(rmse) | |
n_comp = 2 | |
delta_x = 0.0001 | |
extra_points = 4 | |
initial_theta = [0.01 for i in 1:n_comp] | |
g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) | |
y_pred = g.(x_test) | |
rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) | |
println(isapprox(rmse, 37.5, atol = 0.5)) #rmse: 37.87 | |
#println(rmse) | |
println("all done") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment