leelasd/MD_using_ANI1cxx.py

## MD_using_ANI1cxx.py
from openmmtools.constants import kB
from simtk import unit
import numpy as np
from tqdm import tqdm
from IPython.core.display import display, HTML
from IPython.display import SVG
from rdkit.Chem.Draw import IPythonConsole
import mdtraj as md
#from utils import *
import nglview
from rdkit import Chem
from rdkit.Chem import AllChem

# temperature, mass, and related constants
temperature = 300 * unit.kelvin
kT = kB * temperature
mass = (12.0 * unit.dalton)
sigma_v = np.sqrt(kB * temperature / mass)

# openmm units
mass_unit = unit.dalton
distance_unit = unit.nanometer
time_unit = unit.femtosecond
energy_unit = unit.kilojoule_per_mole
speed_unit = distance_unit / time_unit
force_unit = unit.kilojoule_per_mole / unit.nanometer

# ANI-1 units and conversion factors
ani_distance_unit = unit.angstrom
hartree_to_kJ_mol = 2625.499638
ani_energy_unit = hartree_to_kJ_mol * unit.kilojoule_per_mole # simtk.unit doesn't have hartree?
nm_to_angstroms = (1.0 * distance_unit) / (1.0 * ani_distance_unit)

import torchani
import torch
device = torch.device('cpu')
model = torchani.models.ANI1ccx()
model = model.to(device)


#####################################
# some helper functions
def write_pdb(mol, name, tautomer_id):
    # write a pdf file using the name and tautomer_id
    Chem.MolToPDBFile(mol, '{}_{}.pdb'.format(name, tautomer_id))
    return Chem.MolToPDBBlock(mol)

def _mol_with_atom_index(mol):
    # adds atom index to mol display
    atoms = mol.GetNumAtoms()
    for idx in range( atoms ):
        mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str( mol.GetAtomWithIdx(idx).GetIdx() ) )
    return mol

def generate_rdkit_mol(smiles):
    # geneartes a rdkit mol object with 3D coordinates from smiles
    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m)
    display(_mol_with_atom_index(m))
    AllChem.EmbedMolecule(m)
    return m

def from_mol_to_ani_input(mol):
    # generates atom_list and coord_list entries from rdkit mol
    atom_list = []
    coord_list = []
    for a in mol.GetAtoms():
        atom_list.append(a.GetSymbol())
        pos = mol.GetConformer().GetAtomPosition(a.GetIdx())
        coord_list.append([pos.x, pos.y, pos.z])
    return { 'atom_list' : ''.join(atom_list), 'coord_list' : coord_list}
#####################################

def langevin(device,
             model,
             species,
             atom_list,
             x0,
             force,
             n_steps=100,
             stepsize=1 * unit.femtosecond,
             collision_rate=10/unit.picoseconds,
             temperature=300 * unit.kelvin,
             platform = 'cpu',
             progress_bar=False
            ):
    """Unadjusted Langevin dynamics.

    Parameters
    ----------
    device,
    model,
    atom_list
    x0 : array of floats, unit'd (distance unit)
        initial configuration
    force : callable, accepts a unit'd array and returns a unit'd array
        assumes input is in units of distance
        output is in units of energy / distance
    n_steps : integer
        number of Langevin steps
    stepsize : float > 0, in units of time
        finite timestep parameter
    collision_rate : float > 0, in units of 1/time
        controls the rate of interaction with the heat bath
    temperature : float > 0, in units of temperature
    progress_bar : bool
        use tqdm to show progress bar

    Returns
    -------
    traj : [n_steps + 1 x dim] array of floats, unit'd
        trajectory of samples generated by Langevin dynamics

    """

    assert(type(x0) == unit.Quantity)
    assert(type(stepsize) == unit.Quantity)
    assert(type(collision_rate) == unit.Quantity)
    assert(type(temperature) == unit.Quantity)
    trange = range(n_steps)
    trange = tqdm(trange)

    # generate mass arrays
    mass_dict_in_daltons = {'H': 1.0, 'C': 12.0, 'N': 14.0, 'O': 16.0}
    masses = np.array([mass_dict_in_daltons[a] for a in atom_list]) * unit.daltons

    # generate sigma_v and v0 with different masses
    sigma_v = np.array([unit.sqrt(kB * temperature / m) / speed_unit for m in masses]) * speed_unit
    v0 = np.random.randn(len(sigma_v),3) * sigma_v[:,None]

    # convert initial state numpy arrays with correct attached units
    x = np.array(x0.value_in_unit(distance_unit)) * distance_unit
    v = np.array(v0.value_in_unit(speed_unit)) * speed_unit

    # traj is accumulated as a list of arrays with attached units
    traj = [x]
    # energy is accumulated in dynamic
    energies = []

    # dimensionless scalars
    a = np.exp(- collision_rate * stepsize)
    b = np.sqrt(1 - np.exp(-2 * collision_rate * stepsize))

    # compute force on initial configuration
    F, energy_in_kJ_mol = force(x, device, model, species, platform)
    energies.append(energy_in_kJ_mol)

    for _ in trange:

        # v
        v += (stepsize * 0.5) * F / masses[:,None]
        # r
        x += (stepsize * 0.5) * v
        # o
        v = (a * v) + (b * sigma_v[:,None] * np.random.randn(*x.shape))
        # r
        x += (stepsize * 0.5) * v
        F, energy_in_kJ_mol = force(x, device, model, species, platform)
        # v
        v += (stepsize * 0.5) * F / masses[:,None]
        norm_F = np.linalg.norm(F)
        # report gradient norm
        trange.set_postfix({'|force|': norm_F})
        # append energy
        energies.append(energy_in_kJ_mol)

        # check positions and forces are finite
        if (not np.isfinite(x).all()) or (not np.isfinite(norm_F)):
            print("Numerical instability encountered!")
            return traj
        traj.append(x)
    return traj, x, energies

def ANI1ccx_force_and_energy(x, device, model, species, platform):
    """
    Parameters
    ----------
    x : numpy array, unit'd
        coordinates

    Returns
    -------
    F : numpy array, unit'd
        force, with units of kJ/mol/nm attached
    """
    assert(type(x) == unit.Quantity)
    x_in_nm = x.value_in_unit(unit.nanometer)

    coordinates = torch.tensor([x_in_nm],
                               requires_grad=True, device=device, dtype=torch.float32)

    # convert from nm to angstroms
    coordinates_in_angstroms = coordinates * nm_to_angstroms
    _, energy_in_hartree = model((species, coordinates_in_angstroms))

    # convert energy from hartrees to kJ/mol
    energy_in_kJ_mol = energy_in_hartree * hartree_to_kJ_mol
    # derivative of E (in kJ/mol) w.r.t. coordinates (in nm)
    derivative = torch.autograd.grad((energy_in_kJ_mol).sum(), coordinates)[0]

    if platform == 'cpu':
        F_in_openmm_unit = - np.array(derivative)[0]
    elif platform == 'cuda':
        F_in_openmm_unit = - np.array(derivative.cpu())[0]
    else:
        raise RuntimeError('Platform needs to be specified. Either CPU or CUDA.')

    return (F_in_openmm_unit * (unit.kilojoule_per_mole / unit.nanometer)), energy_in_kJ_mol.item()

################################3
# simulate tautomers
name = 'molDWRow_37'
t1_smiles = 'CC(C(CCC1)=C1O)=O'
t2_smiles = 'C/C(/O)=C(\CCC1)/C1=O'

#name = 'molDWRow_298'
#t1_smiles = 'CC(CC(C)=O)=O'
#t2_smiles = 'CC(/C=C(/C)\O)=O'

# specify run id
run = 1
# which tautomer form do you want to simulation [1/2]
tautomer_idx = 1

# generate rdkit mol for smiles
mols = { 't1' : generate_rdkit_mol(t1_smiles), 't2' : generate_rdkit_mol(t2_smiles)}
write_pdb(mols['t1'], name, 't1')
write_pdb(mols['t2'], name, 't2')

# continueing with only one mol structure
mol = mols['t{}'.format(tautomer_idx)]

# continueing with only one mol structure
mol = mols['t{}'.format(tautomer_idx)]
ani_input = from_mol_to_ani_input(mol)

# extract hydrogen acceptor idx for to_mol
species = model.species_to_tensor(ani_input['atom_list']).to(device).unsqueeze(0)

# initial conditions: coordinates from example were given in units of angstroms
x0_in_angstroms = np.array(ani_input['coord_list']) * unit.angstrom

# run dynamics and append trajectory
traj_in_nm = []
trajectory, last_frame, energies = langevin(device, model, species, ani_input['atom_list'], x0_in_angstroms, ANI1ccx_force_and_energy, n_steps=5000, stepsize=2*unit.femtosecond, temperature=temperature, platform='cpu', progress_bar=True)
traj_in_nm += [x / unit.nanometer for x in trajectory]

# generating mdtraj traj object and save as dcd
topology = md.load('{}_t{}.pdb'.format(name, tautomer_idx)).topology
ani_traj = md.Trajectory(traj_in_nm, topology)
ani_traj = ani_traj.superpose(ani_traj[0]) # RMSD align onto first frame
ani_traj.save('results_{}_t{}_run{}.dcd'.format(name, tautomer_idx, run))

# save energy in csv
f = open('energy_{}_t{}_run{}.csv'.format(name, tautomer_idx, run), 'w')
for idx, e in enumerate(energies):
    f.write('{}, {}\n'.format(idx, e))
f.close()
	from openmmtools.constants import kB
	from simtk import unit
	import numpy as np
	from tqdm import tqdm
	from IPython.core.display import display, HTML
	from IPython.display import SVG
	from rdkit.Chem.Draw import IPythonConsole
	import mdtraj as md
	#from utils import *
	import nglview
	from rdkit import Chem
	from rdkit.Chem import AllChem

	# temperature, mass, and related constants
	temperature = 300 * unit.kelvin
	kT = kB * temperature
	mass = (12.0 * unit.dalton)
	sigma_v = np.sqrt(kB * temperature / mass)

	# openmm units
	mass_unit = unit.dalton
	distance_unit = unit.nanometer
	time_unit = unit.femtosecond
	energy_unit = unit.kilojoule_per_mole
	speed_unit = distance_unit / time_unit
	force_unit = unit.kilojoule_per_mole / unit.nanometer

	# ANI-1 units and conversion factors
	ani_distance_unit = unit.angstrom
	hartree_to_kJ_mol = 2625.499638
	ani_energy_unit = hartree_to_kJ_mol * unit.kilojoule_per_mole # simtk.unit doesn't have hartree?
	nm_to_angstroms = (1.0 * distance_unit) / (1.0 * ani_distance_unit)

	import torchani
	import torch
	device = torch.device('cpu')
	model = torchani.models.ANI1ccx()
	model = model.to(device)


	#####################################
	# some helper functions
	def write_pdb(mol, name, tautomer_id):
	# write a pdf file using the name and tautomer_id
	Chem.MolToPDBFile(mol, '{}_{}.pdb'.format(name, tautomer_id))
	return Chem.MolToPDBBlock(mol)

	def _mol_with_atom_index(mol):
	# adds atom index to mol display
	atoms = mol.GetNumAtoms()
	for idx in range( atoms ):
	mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str( mol.GetAtomWithIdx(idx).GetIdx() ) )
	return mol

	def generate_rdkit_mol(smiles):
	# geneartes a rdkit mol object with 3D coordinates from smiles
	m = Chem.MolFromSmiles(smiles)
	m = Chem.AddHs(m)
	display(_mol_with_atom_index(m))
	AllChem.EmbedMolecule(m)
	return m

	def from_mol_to_ani_input(mol):
	# generates atom_list and coord_list entries from rdkit mol
	atom_list = []
	coord_list = []
	for a in mol.GetAtoms():
	atom_list.append(a.GetSymbol())
	pos = mol.GetConformer().GetAtomPosition(a.GetIdx())
	coord_list.append([pos.x, pos.y, pos.z])
	return { 'atom_list' : ''.join(atom_list), 'coord_list' : coord_list}
	#####################################

	def langevin(device,
	model,
	species,
	atom_list,
	x0,
	force,
	n_steps=100,
	stepsize=1 * unit.femtosecond,
	collision_rate=10/unit.picoseconds,
	temperature=300 * unit.kelvin,
	platform = 'cpu',
	progress_bar=False
	):
	"""Unadjusted Langevin dynamics.

	Parameters
	----------
	device,
	model,
	atom_list
	x0 : array of floats, unit'd (distance unit)
	initial configuration
	force : callable, accepts a unit'd array and returns a unit'd array
	assumes input is in units of distance
	output is in units of energy / distance
	n_steps : integer
	number of Langevin steps
	stepsize : float > 0, in units of time
	finite timestep parameter
	collision_rate : float > 0, in units of 1/time
	controls the rate of interaction with the heat bath
	temperature : float > 0, in units of temperature
	progress_bar : bool
	use tqdm to show progress bar

	Returns
	-------
	traj : [n_steps + 1 x dim] array of floats, unit'd
	trajectory of samples generated by Langevin dynamics

	"""

	assert(type(x0) == unit.Quantity)
	assert(type(stepsize) == unit.Quantity)
	assert(type(collision_rate) == unit.Quantity)
	assert(type(temperature) == unit.Quantity)
	trange = range(n_steps)
	trange = tqdm(trange)

	# generate mass arrays
	mass_dict_in_daltons = {'H': 1.0, 'C': 12.0, 'N': 14.0, 'O': 16.0}
	masses = np.array([mass_dict_in_daltons[a] for a in atom_list]) * unit.daltons

	# generate sigma_v and v0 with different masses
	sigma_v = np.array([unit.sqrt(kB * temperature / m) / speed_unit for m in masses]) * speed_unit
	v0 = np.random.randn(len(sigma_v),3) * sigma_v[:,None]

	# convert initial state numpy arrays with correct attached units
	x = np.array(x0.value_in_unit(distance_unit)) * distance_unit
	v = np.array(v0.value_in_unit(speed_unit)) * speed_unit

	# traj is accumulated as a list of arrays with attached units
	traj = [x]
	# energy is accumulated in dynamic
	energies = []

	# dimensionless scalars
	a = np.exp(- collision_rate * stepsize)
	b = np.sqrt(1 - np.exp(-2 * collision_rate * stepsize))

	# compute force on initial configuration
	F, energy_in_kJ_mol = force(x, device, model, species, platform)
	energies.append(energy_in_kJ_mol)

	for _ in trange:

	# v
	v += (stepsize * 0.5) * F / masses[:,None]
	# r
	x += (stepsize * 0.5) * v
	# o
	v = (a * v) + (b * sigma_v[:,None] * np.random.randn(*x.shape))
	# r
	x += (stepsize * 0.5) * v
	F, energy_in_kJ_mol = force(x, device, model, species, platform)
	# v
	v += (stepsize * 0.5) * F / masses[:,None]
	norm_F = np.linalg.norm(F)
	# report gradient norm
	trange.set_postfix({'\|force\|': norm_F})
	# append energy
	energies.append(energy_in_kJ_mol)

	# check positions and forces are finite
	if (not np.isfinite(x).all()) or (not np.isfinite(norm_F)):
	print("Numerical instability encountered!")
	return traj
	traj.append(x)
	return traj, x, energies

	def ANI1ccx_force_and_energy(x, device, model, species, platform):
	"""
	Parameters
	----------
	x : numpy array, unit'd
	coordinates

	Returns
	-------
	F : numpy array, unit'd
	force, with units of kJ/mol/nm attached
	"""
	assert(type(x) == unit.Quantity)
	x_in_nm = x.value_in_unit(unit.nanometer)

	coordinates = torch.tensor([x_in_nm],
	requires_grad=True, device=device, dtype=torch.float32)

	# convert from nm to angstroms
	coordinates_in_angstroms = coordinates * nm_to_angstroms
	_, energy_in_hartree = model((species, coordinates_in_angstroms))

	# convert energy from hartrees to kJ/mol
	energy_in_kJ_mol = energy_in_hartree * hartree_to_kJ_mol
	# derivative of E (in kJ/mol) w.r.t. coordinates (in nm)
	derivative = torch.autograd.grad((energy_in_kJ_mol).sum(), coordinates)[0]

	if platform == 'cpu':
	F_in_openmm_unit = - np.array(derivative)[0]
	elif platform == 'cuda':
	F_in_openmm_unit = - np.array(derivative.cpu())[0]
	else:
	raise RuntimeError('Platform needs to be specified. Either CPU or CUDA.')

	return (F_in_openmm_unit * (unit.kilojoule_per_mole / unit.nanometer)), energy_in_kJ_mol.item()

	################################3
	# simulate tautomers
	name = 'molDWRow_37'
	t1_smiles = 'CC(C(CCC1)=C1O)=O'
	t2_smiles = 'C/C(/O)=C(\CCC1)/C1=O'

	#name = 'molDWRow_298'
	#t1_smiles = 'CC(CC(C)=O)=O'
	#t2_smiles = 'CC(/C=C(/C)\O)=O'

	# specify run id
	run = 1
	# which tautomer form do you want to simulation [1/2]
	tautomer_idx = 1

	# generate rdkit mol for smiles
	mols = { 't1' : generate_rdkit_mol(t1_smiles), 't2' : generate_rdkit_mol(t2_smiles)}
	write_pdb(mols['t1'], name, 't1')
	write_pdb(mols['t2'], name, 't2')

	# continueing with only one mol structure
	mol = mols['t{}'.format(tautomer_idx)]

	# continueing with only one mol structure
	mol = mols['t{}'.format(tautomer_idx)]
	ani_input = from_mol_to_ani_input(mol)

	# extract hydrogen acceptor idx for to_mol
	species = model.species_to_tensor(ani_input['atom_list']).to(device).unsqueeze(0)

	# initial conditions: coordinates from example were given in units of angstroms
	x0_in_angstroms = np.array(ani_input['coord_list']) * unit.angstrom

	# run dynamics and append trajectory
	traj_in_nm = []
	trajectory, last_frame, energies = langevin(device, model, species, ani_input['atom_list'], x0_in_angstroms, ANI1ccx_force_and_energy, n_steps=5000, stepsize=2*unit.femtosecond, temperature=temperature, platform='cpu', progress_bar=True)
	traj_in_nm += [x / unit.nanometer for x in trajectory]

	# generating mdtraj traj object and save as dcd
	topology = md.load('{}_t{}.pdb'.format(name, tautomer_idx)).topology
	ani_traj = md.Trajectory(traj_in_nm, topology)
	ani_traj = ani_traj.superpose(ani_traj[0]) # RMSD align onto first frame
	ani_traj.save('results_{}_t{}_run{}.dcd'.format(name, tautomer_idx, run))

	# save energy in csv
	f = open('energy_{}_t{}_run{}.csv'.format(name, tautomer_idx, run), 'w')
	for idx, e in enumerate(energies):
	f.write('{}, {}\n'.format(idx, e))
	f.close()