-
-
Save leelasd/04e16c3b1142881182b7ac3766fc307f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from openmmtools.constants import kB | |
from simtk import unit | |
import numpy as np | |
from tqdm import tqdm | |
from IPython.core.display import display, HTML | |
from IPython.display import SVG | |
from rdkit.Chem.Draw import IPythonConsole | |
import mdtraj as md | |
#from utils import * | |
import nglview | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
# temperature, mass, and related constants | |
temperature = 300 * unit.kelvin | |
kT = kB * temperature | |
mass = (12.0 * unit.dalton) | |
sigma_v = np.sqrt(kB * temperature / mass) | |
# openmm units | |
mass_unit = unit.dalton | |
distance_unit = unit.nanometer | |
time_unit = unit.femtosecond | |
energy_unit = unit.kilojoule_per_mole | |
speed_unit = distance_unit / time_unit | |
force_unit = unit.kilojoule_per_mole / unit.nanometer | |
# ANI-1 units and conversion factors | |
ani_distance_unit = unit.angstrom | |
hartree_to_kJ_mol = 2625.499638 | |
ani_energy_unit = hartree_to_kJ_mol * unit.kilojoule_per_mole # simtk.unit doesn't have hartree? | |
nm_to_angstroms = (1.0 * distance_unit) / (1.0 * ani_distance_unit) | |
import torchani | |
import torch | |
device = torch.device('cpu') | |
model = torchani.models.ANI1ccx() | |
model = model.to(device) | |
##################################### | |
# some helper functions | |
def write_pdb(mol, name, tautomer_id): | |
# write a pdf file using the name and tautomer_id | |
Chem.MolToPDBFile(mol, '{}_{}.pdb'.format(name, tautomer_id)) | |
return Chem.MolToPDBBlock(mol) | |
def _mol_with_atom_index(mol): | |
# adds atom index to mol display | |
atoms = mol.GetNumAtoms() | |
for idx in range( atoms ): | |
mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str( mol.GetAtomWithIdx(idx).GetIdx() ) ) | |
return mol | |
def generate_rdkit_mol(smiles): | |
# geneartes a rdkit mol object with 3D coordinates from smiles | |
m = Chem.MolFromSmiles(smiles) | |
m = Chem.AddHs(m) | |
display(_mol_with_atom_index(m)) | |
AllChem.EmbedMolecule(m) | |
return m | |
def from_mol_to_ani_input(mol): | |
# generates atom_list and coord_list entries from rdkit mol | |
atom_list = [] | |
coord_list = [] | |
for a in mol.GetAtoms(): | |
atom_list.append(a.GetSymbol()) | |
pos = mol.GetConformer().GetAtomPosition(a.GetIdx()) | |
coord_list.append([pos.x, pos.y, pos.z]) | |
return { 'atom_list' : ''.join(atom_list), 'coord_list' : coord_list} | |
##################################### | |
def langevin(device, | |
model, | |
species, | |
atom_list, | |
x0, | |
force, | |
n_steps=100, | |
stepsize=1 * unit.femtosecond, | |
collision_rate=10/unit.picoseconds, | |
temperature=300 * unit.kelvin, | |
platform = 'cpu', | |
progress_bar=False | |
): | |
"""Unadjusted Langevin dynamics. | |
Parameters | |
---------- | |
device, | |
model, | |
atom_list | |
x0 : array of floats, unit'd (distance unit) | |
initial configuration | |
force : callable, accepts a unit'd array and returns a unit'd array | |
assumes input is in units of distance | |
output is in units of energy / distance | |
n_steps : integer | |
number of Langevin steps | |
stepsize : float > 0, in units of time | |
finite timestep parameter | |
collision_rate : float > 0, in units of 1/time | |
controls the rate of interaction with the heat bath | |
temperature : float > 0, in units of temperature | |
progress_bar : bool | |
use tqdm to show progress bar | |
Returns | |
------- | |
traj : [n_steps + 1 x dim] array of floats, unit'd | |
trajectory of samples generated by Langevin dynamics | |
""" | |
assert(type(x0) == unit.Quantity) | |
assert(type(stepsize) == unit.Quantity) | |
assert(type(collision_rate) == unit.Quantity) | |
assert(type(temperature) == unit.Quantity) | |
trange = range(n_steps) | |
trange = tqdm(trange) | |
# generate mass arrays | |
mass_dict_in_daltons = {'H': 1.0, 'C': 12.0, 'N': 14.0, 'O': 16.0} | |
masses = np.array([mass_dict_in_daltons[a] for a in atom_list]) * unit.daltons | |
# generate sigma_v and v0 with different masses | |
sigma_v = np.array([unit.sqrt(kB * temperature / m) / speed_unit for m in masses]) * speed_unit | |
v0 = np.random.randn(len(sigma_v),3) * sigma_v[:,None] | |
# convert initial state numpy arrays with correct attached units | |
x = np.array(x0.value_in_unit(distance_unit)) * distance_unit | |
v = np.array(v0.value_in_unit(speed_unit)) * speed_unit | |
# traj is accumulated as a list of arrays with attached units | |
traj = [x] | |
# energy is accumulated in dynamic | |
energies = [] | |
# dimensionless scalars | |
a = np.exp(- collision_rate * stepsize) | |
b = np.sqrt(1 - np.exp(-2 * collision_rate * stepsize)) | |
# compute force on initial configuration | |
F, energy_in_kJ_mol = force(x, device, model, species, platform) | |
energies.append(energy_in_kJ_mol) | |
for _ in trange: | |
# v | |
v += (stepsize * 0.5) * F / masses[:,None] | |
# r | |
x += (stepsize * 0.5) * v | |
# o | |
v = (a * v) + (b * sigma_v[:,None] * np.random.randn(*x.shape)) | |
# r | |
x += (stepsize * 0.5) * v | |
F, energy_in_kJ_mol = force(x, device, model, species, platform) | |
# v | |
v += (stepsize * 0.5) * F / masses[:,None] | |
norm_F = np.linalg.norm(F) | |
# report gradient norm | |
trange.set_postfix({'|force|': norm_F}) | |
# append energy | |
energies.append(energy_in_kJ_mol) | |
# check positions and forces are finite | |
if (not np.isfinite(x).all()) or (not np.isfinite(norm_F)): | |
print("Numerical instability encountered!") | |
return traj | |
traj.append(x) | |
return traj, x, energies | |
def ANI1ccx_force_and_energy(x, device, model, species, platform): | |
""" | |
Parameters | |
---------- | |
x : numpy array, unit'd | |
coordinates | |
Returns | |
------- | |
F : numpy array, unit'd | |
force, with units of kJ/mol/nm attached | |
""" | |
assert(type(x) == unit.Quantity) | |
x_in_nm = x.value_in_unit(unit.nanometer) | |
coordinates = torch.tensor([x_in_nm], | |
requires_grad=True, device=device, dtype=torch.float32) | |
# convert from nm to angstroms | |
coordinates_in_angstroms = coordinates * nm_to_angstroms | |
_, energy_in_hartree = model((species, coordinates_in_angstroms)) | |
# convert energy from hartrees to kJ/mol | |
energy_in_kJ_mol = energy_in_hartree * hartree_to_kJ_mol | |
# derivative of E (in kJ/mol) w.r.t. coordinates (in nm) | |
derivative = torch.autograd.grad((energy_in_kJ_mol).sum(), coordinates)[0] | |
if platform == 'cpu': | |
F_in_openmm_unit = - np.array(derivative)[0] | |
elif platform == 'cuda': | |
F_in_openmm_unit = - np.array(derivative.cpu())[0] | |
else: | |
raise RuntimeError('Platform needs to be specified. Either CPU or CUDA.') | |
return (F_in_openmm_unit * (unit.kilojoule_per_mole / unit.nanometer)), energy_in_kJ_mol.item() | |
################################3 | |
# simulate tautomers | |
name = 'molDWRow_37' | |
t1_smiles = 'CC(C(CCC1)=C1O)=O' | |
t2_smiles = 'C/C(/O)=C(\CCC1)/C1=O' | |
#name = 'molDWRow_298' | |
#t1_smiles = 'CC(CC(C)=O)=O' | |
#t2_smiles = 'CC(/C=C(/C)\O)=O' | |
# specify run id | |
run = 1 | |
# which tautomer form do you want to simulation [1/2] | |
tautomer_idx = 1 | |
# generate rdkit mol for smiles | |
mols = { 't1' : generate_rdkit_mol(t1_smiles), 't2' : generate_rdkit_mol(t2_smiles)} | |
write_pdb(mols['t1'], name, 't1') | |
write_pdb(mols['t2'], name, 't2') | |
# continueing with only one mol structure | |
mol = mols['t{}'.format(tautomer_idx)] | |
# continueing with only one mol structure | |
mol = mols['t{}'.format(tautomer_idx)] | |
ani_input = from_mol_to_ani_input(mol) | |
# extract hydrogen acceptor idx for to_mol | |
species = model.species_to_tensor(ani_input['atom_list']).to(device).unsqueeze(0) | |
# initial conditions: coordinates from example were given in units of angstroms | |
x0_in_angstroms = np.array(ani_input['coord_list']) * unit.angstrom | |
# run dynamics and append trajectory | |
traj_in_nm = [] | |
trajectory, last_frame, energies = langevin(device, model, species, ani_input['atom_list'], x0_in_angstroms, ANI1ccx_force_and_energy, n_steps=5000, stepsize=2*unit.femtosecond, temperature=temperature, platform='cpu', progress_bar=True) | |
traj_in_nm += [x / unit.nanometer for x in trajectory] | |
# generating mdtraj traj object and save as dcd | |
topology = md.load('{}_t{}.pdb'.format(name, tautomer_idx)).topology | |
ani_traj = md.Trajectory(traj_in_nm, topology) | |
ani_traj = ani_traj.superpose(ani_traj[0]) # RMSD align onto first frame | |
ani_traj.save('results_{}_t{}_run{}.dcd'.format(name, tautomer_idx, run)) | |
# save energy in csv | |
f = open('energy_{}_t{}_run{}.csv'.format(name, tautomer_idx, run), 'w') | |
for idx, e in enumerate(energies): | |
f.write('{}, {}\n'.format(idx, e)) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment