This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ranked_confidence_list = np.argsort(y_var, axis=0).flatten() | |
print(ranked_confidence_list) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Output Standardised RMSE and RMSE on Train Set | |
y_pred_train, _ = m.predict_f(X_train) | |
train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) | |
train_rmse = np.sqrt(mean_squared_error(y_scaler.inverse_transform(y_train), y_scaler.inverse_transform(y_pred_train))) | |
print("\nTrain RMSE (Standardised): {:.3f} nm".format(train_rmse_stan)) | |
print("Train RMSE: {:.3f} nm".format(train_rmse)) | |
# Output R^2, RMSE and MAE on the test set |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size, random_state=0) | |
y_train = y_train.reshape(-1, 1) | |
y_test = y_test.reshape(-1, 1) | |
# We standardise the outputs but leave the inputs unchanged | |
_, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test) | |
X_train = X_train.astype(np.float64) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test_set_size = 0.2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transform_data(X_train, y_train, X_test, y_test): | |
""" | |
Apply feature scaling to the data. Return the standardised train and | |
test sets together with the scaler object for the target values. | |
:param X_train: input train data | |
:param y_train: train labels | |
:param X_test: input test data | |
:param y_test: test labels | |
:return: X_train_scaled, y_train_scaled, X_test_scaled, y_test_scaled, y_scaler |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We define the Gaussian Process Regression Model using the Tanimoto kernel | |
m = None | |
def objective_closure(): | |
return -m.log_marginal_likelihood() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rdkit_mols = [MolFromSmiles(smiles) for smiles in smiles_list] | |
X = [AllChem.GetMorganFingerprintAsBitVect(mol, radius=3, nBits=2048) for mol in rdkit_mols] | |
X = np.asarray(X) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = pd.read_csv('../dataset/photoswitches.csv') # Load the photoswitch dataset using pandas | |
# Create a list of molecules smiles and associated properties | |
smiles_list = df['SMILES'].to_list() | |
property_vals = df['E isomer pi-pi* wavelength in nm'].to_numpy() | |
# Delete NaN values | |
smiles_list = list(np.delete(np.array(smiles_list), np.argwhere(np.isnan(property_vals)))) | |
y = np.delete(property_vals, np.argwhere(np.isnan(property_vals))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Tanimoto(gpflow.kernels.Kernel): | |
def __init__(self): | |
super().__init__() | |
# We constrain the value of the kernel variance to be positive when it's being optimised | |
self.variance = gpflow.Parameter(1.0, transform=positive()) | |
def K(self, X, X2=None): | |
""" | |
Compute the Tanimoto kernel matrix σ² * ((<x, y>) / (||x||^2 + ||y||^2 - <x, y>)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gpflow | |
from gpflow.mean_functions import Constant | |
from gpflow.utilities import positive, print_summary | |
from gpflow.utilities.ops import broadcasting_elementwise | |
from matplotlib import pyplot as plt | |
import numpy as np | |
import pandas as pd | |
from rdkit.Chem import AllChem, Descriptors, MolFromSmiles | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error |