Skip to content

Instantly share code, notes, and snippets.

@larsbratholm
Last active November 8, 2019 14:59
Show Gist options
  • Save larsbratholm/b825126c632fc73fee0360f7c2cbb2b6 to your computer and use it in GitHub Desktop.
Save larsbratholm/b825126c632fc73fee0360f7c2cbb2b6 to your computer and use it in GitHub Desktop.
nmr interpolation
import sklearn.neighbors
import sklearn.ensemble
import itertools
def create_lookup_table(angles, cs, lookup_table_filename, granularity_degree=20):
"""
Creates lookup tables for procs15
"""
# convert everything to numpy arrays
angles = np.asarray(angles)
cs = np.asarray(cs)
# Create the empty lookup table
lookup_table = np.zeros((3,) + (360//granularity_degree+1,) * angles.shape[1])
# Create the data to do predictions on (just all the angles that go into the array)
# I haven't tested this fully, but seem to work
test_data = np.zeros(((360//granularity_degree+1) ** angles.shape[1], angles.shape[1]))
for i, idx in enumerate(itertools.product(list(range(360//granularity_degree+1)), repeat=angles.shape[1])):
idx = np.asarray(idx) * granularity_degree - 180
test_data[i] = idx
# Add cos/sin to features
angles_trig = np.zeros((angles.shape[0],angles.shape[1]*3))
test_data_trig = np.zeros((test_data.shape[0],test_data.shape[1]*3))
for i in range(angles.shape[1]):
angles_trig[:,i*3] = np.cos(angles[:,i])
angles_trig[:,i*3+1] = np.sin(angles[:,i])
angles_trig[:,i*3+2] = angles[:,i]
test_data_trig[:,i*3] = np.cos(test_data[:,i])
test_data_trig[:,i*3+1] = np.sin(test_data[:,i])
test_data_trig[:,i*3+2] = test_data[:,i]
# Make predictions for all the atoms (previous, central, next)
for ang in range(3):
y = cs[:,ang]
# KNeighboursRegressor if ExtraTrees takes too much memory.
# Make sure the cross validation is automated
#estimator = sklearn.neighbors.KNeighborsRegressor()
#param_grid = {'n_neighbors': [1,2,3,4,5,6,7,8,9,10], 'weights': ['uniform', 'distance'], 'p': [1,2]}
#mod = sklearn.model_selection.GridSearchCV(estimator, param_grid=param_grid, scoring='neg_mean_squared_error',
# cv=sklearn.model_selection.KFold(n_splits=10, shuffle=True))
# ExtraTrees Regressor. Increase min_samples_leaf or decrease n_estimators if having memory issues.
# Or use the KNeighboursRegressor instead.
estimator = sklearn.ensemble.ExtraTreesRegressor(n_estimators=50, min_samples_leaf=1)
param_grid = {}
mod = sklearn.model_selection.GridSearchCV(estimator, param_grid=param_grid, scoring='neg_mean_squared_error',
cv=sklearn.model_selection.KFold(n_splits=3, shuffle=True))
# Fit the model
mod.fit(angles_trig, y)
#print("Best params", mod.best_params_)
print("Best score for angle %d (should ideally be smaller than 0.4 for N, 0.1 for C and 0.02 for H)" % ang, mod.best_score_)
# Make predictions on the test data
predictions = mod.predict(test_data_trig)
# store predictions in lookup table
lookup_table[ang] = predictions.reshape(lookup_table[ang].shape)
np.save(lookup_table_filename, lookup_table)
if __name__ == "__main__":
# Read the npy file as an alternative to the actual log files
# This has shape (3,361,361). The first axis is the three residues in the
# tripeptide. x[0] is the first residue, x[1] is the second residue, x[2] is the third residue.
# The second and third axis are the backbone angles. Angles of (90,90) correspond to index (270,270),
# so the counting starts at -180.
# x[:,360,360] is equal to x[:,0,0] to make the linear interpolation in procs easier to do.
x = np.load("ALA_cb.npy")
angles = []
cs = []
# Get angles and chemical shift array from the matrix
for ang1 in range(361):
for ang2 in range(361):
angles.append([ang1-180,ang2-180])
cs.append(x[:,ang1, ang2])
cs = np.asarray(cs)
"""
angles and cs should be exactly what you get with your script (in this case only cb cs),
so start from here when you do the interpolation yourself.
For Alanine, angles and cs need to have the following form:
angles = [[-180,-180], [-180, -160], ...]
cs = [[172.3, 175.8, 172.0], [173.7, 178.2, 172.5], ...]
"""
create_lookup_table(angles=angles, cs=cs, lookup_table_filename='ALA_cb_new.npy', granularity_degree=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment