Skip to content

Instantly share code, notes, and snippets.

@muammar muammar/data.py forked from ghutchis/data.py
Created Oct 2, 2019

Embed
What would you like to do?
Normal Mode Conformer Generation using cclib and OpenBabel Python
element_symbols = [
"Xx", "H", "He", "Li", "Be", "B", "C", "N", "O", "F",
"Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K",
"Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu",
"Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In",
"Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr",
"Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm",
"Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au",
"Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac",
"Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es",
"Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt",
"Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og" ]
element_masses = [
# from IUPAC http://www.chem.qmul.ac.uk/iupac/AtWt/
# (2015 set, updated from 2013)
0, 1.00784, 4.0026,
6.938, 9.01218, 10.806, 12.011, 14.006, 15.9994, 18.9984, 20.1797,
22.9898, 24.305, 26.9815,
28.0855, 30.9738, 32.065, 35.453, 39.948, 39.0983, 40.078,
44.9559, 47.867, 50.9415, 51.9961, 54.938, 55.845, 58.9332,
58.6934, 63.546, 65.38, 69.723, 72.64, 74.9216, 78.971,
79.904, 83.798, 85.4678, 87.62, 88.9058, 91.224, 92.9064,
95.95, 97, 101.07, 102.9055, 106.42, 107.8682, 112.414,
114.818, 118.71, 121.76, 127.6, 126.9045, 131.293, 132.9055,
137.327, 138.9055, 140.116, 140.9077, 144.242, 145, 150.36,
151.964, 157.25, 158.9253, 162.5, 164.9303, 167.259, 168.9342,
173.045, 174.9668, 178.49, 180.9479, 183.84, 186.207, 190.23,
192.217, 195.084, 196.9666, 200.592, 204.38, 207.2, 208.9804,
209, 210, 222, 223, 226, 227, 232.0377,
231.0358, 238.0289, 237, 244, 243, 247, 247,
251, 252, 257, 258, 259, 262, 267,
270, 269, 270, 270, 278, 281, 281,
285, 286, 289, 289, 293, 293, 294 ]
element_radii = [ # covalent radii
# From Pyykko doi: 10.1002/chem.200800987
# Dummy, 1st row
0.18, 0.32, 0.46,
# 2nd row
1.33, 1.02, 0.85, 0.75, 0.71, 0.63, 0.64, 0.67,
# 3rd row
1.55, 1.39, 1.26, 1.16, 1.11, 1.03, 0.99, 0.96,
# 4th row K, Ca
1.96, 1.71,
# 1st row TM (Sc.. Zn)
1.48, 1.36, 1.34, 1.22, 1.19, 1.16, 1.11, 1.10, 1.12, 1.18,
# 4th row p-block (Ga..Kr)
1.24, 1.21, 1.21, 1.16, 1.14, 1.17,
# 5th row Rb, Sr
2.10, 1.85,
# 2nd row TM (Y..Cd)
1.63, 1.54, 1.47, 1.38, 1.28, 1.25, 1.25, 1.20, 1.28, 1.36,
# 5th row p-block (In..Xe)
1.42, 1.40, 1.40, 1.36, 1.33, 1.31,
# 6th row Cs, Ba
2.32, 1.96,
# Lanthanides La..Gd
1.80, 1.63, 1.76, 1.74, 1.73, 1.72, 1.68,
# Lanthanides Tb..Yb
1.69, 1.68, 1.67, 1.66, 1.65, 1.64, 1.70,
# 3rd row TM (Lu..Hg)
1.62, 1.52, 1.46, 1.37, 1.31, 1.29, 1.22, 1.23, 1.24, 1.33,
# 6th row p-block (Tl..Rn)
1.44, 1.44, 1.51, 1.45, 1.47, 1.42,
# 7th row Fr, Ra
2.23, 2.01,
# Actinides (Ac.. Am)
1.86, 1.75, 1.69, 1.70, 1.71, 1.72, 1.66,
# Actinides (Cm..No)
1.66, 1.68, 1.68, 1.65, 1.67, 1.73, 1.76,
# Trans-actinides
1.61, 1.57, 1.49, 1.43, 1.41, 1.34, 1.29, 1.28, 1.21, 1.22,
1.36, 1.43, 1.62, 1.75, 1.65, 1.57
]
#!/usr/bin/env python
from __future__ import print_function
import sys, os
import logging
import numpy as np
from cclib.io import ccopen
from nms import nmsgenerator
from data import element_masses, element_symbols
def nms_ts(n):
"""Normal Mode Temperature and Samples
Args:
n (int): The number of non-hydrogen atoms.
Returns:
temperature (float): thermal energy to populate normal normal_modes
samples (int): number of samples to consider (ignored)
"""
stable = {1: (2000.0, 500),
2: (1500.0, 450),
3: (1000.0, 425),
4: ( 600.0, 400),
5: ( 600.0, 200),
6: ( 600.0, 30),
7: ( 600.0, 20),
8: ( 450.0, 5),
9: ( 450.0, 3)}
if n in stable: return stable[n]
else: return (400, 2)
# read through a bunch of files on the command-line
for arg in sys.argv[1:]:
# read the file with cclib
file = ccopen(arg)
file.logger.setLevel(logging.ERROR)
molecule = file.parse()
# the atom labels (here as atomic numbers)
labels = molecule.atomnos
try:
# not all cclib parsers will populate atommasses
# but this is more accurate, since it would include isotopes
mass = np.asarray(molecule.atommasses).repeat(3)
except AttributeError:
mass = np.asarray([element_masses[i] for i in labels]).repeat(3)
# get the last geometry from a geometry optimization
# should end up as an ndarray (N, 3)
xyz = np.asarray(molecule.atomcoords[-1]).reshape(-1,3)
# vibrational frequencies (remove any zeros, e.g., translations, rotations)
freq = np.asarray(molecule.vibfreqs)
freq = freq[freq !=0.0] # should now be an array of size (3N-6,)
# get flattened normal modes and reshape to (3N, 3N)
# ensure they're in (CartesianCoordinate, ModeNumber)
nmo = np.asarray(molecule.vibdisps).reshape(-1, xyz.shape[0]*3) # [3N, 3N]
# orca filters out translations and rotations
# they appear as modes with all-zero components
# so please remove them
nmo = nmo[np.array([not (i == 0.0).all() for i in nmo])]
# compute mass weighted force constants for each of 3N-6 normal modes
# shapes: [3N-6] * ([3N-6, 3N] * [3N,]).sum(axis=1) = [3N-6,]
fcc = freq * freq * (nmo * nmo * mass).sum(axis=1) / 16.9744 / 10e4
# get the temperature and number of samples for sampling
t, s = nms_ts(np.count_nonzero(labels != 1))
g = nmsgenerator(xyz, nmo, fcc, labels, t, 0)
# generate 100 displaced geometries into new XYZ files
filename, extension = os.path.splitext(arg)
for i in range(100):
energy, rxyz = g.get_random_structure(100)
# print an xyz file
xyzstr = '{}\n{}\n'.format(len(rxyz), energy)
for l, x in zip(labels, rxyz):
xyzstr += '{} {} {} {}\n'.format(element_symbols[l], *x)
new_name = '{}.xyz'.format(i)
with open(new_name, 'w') as f:
f.write(xyzstr)
import numpy as np
import pybel
import openbabel as ob
from data import element_radii
mDynetoMet = 1.0e-5 * 1.0e-3 * 1.0e10
# boltzmann constant (in )
Kb = 1.38064852e-23
# meters to Angstrom
MtoA = 1.0e10
class nmsgenerator():
# xyz = initial min structure
# nmo = normal mode displacements
# fcc = force constants
# spc = atomic species list
# T = temperature of displacement
try:
et = ob.OBElementTable
except:
et = {} # OpenBabel 3 doesn't use this class anymore
def __init__(self,xyz,nmo,fcc,spc,T,minfc = 1.0E-3):
self.xyz = xyz
self.nmo = nmo
self.labels = spc
self.fcc = np.array([i if i > minfc else minfc for i in fcc])
self.chg = np.array([self.__getCharge__(i) for i in spc])
self.radii = [element_radii[i] for i in self.chg]
self.T = T
self.Na = xyz.shape[0]
self.Nf = nmo.shape[0]
self.e0 = self.__get_energy__(xyz)
# returns atomic charge (number)
def __getCharge__(self,t):
if isinstance(t, (int, np.int32)):
return t
return self.et.GetAtomicNum(t)
# Checks for small atomic distances
def __check_atomic_distances__(self,rxyz):
for i in range(0,self.Na):
for j in range(i+1,self.Na):
Rij = np.linalg.norm(rxyz[i]-rxyz[j])
if Rij < 0.6 * (self.radii[i] * self.radii[j]):
return True
return False
# get energy
def __get_energy__(self, rxyz):
xyzstr = '{}\n\n'.format(len(rxyz))
for l, x in zip(self.labels, rxyz):
xyzstr += '{} {} {} {}\n'.format(l, *x)
mol = pybel.readstring('xyz', xyzstr)
ff = pybel._forcefields['uff']
if not ff.Setup(mol.OBMol):
return None
return ff.Energy()
# Generate a structure
def __genrandomstruct__(self):
rdt = np.random.random(self.Nf+1)
rdt[0] = 0.0
norm = np.random.random(1)[0]
rdt = norm*np.sort(rdt)
rdt[self.Nf] = norm
oxyz = self.xyz.copy()
for i in range(self.Nf):
# convert force constants from mDyne to atomic
Ki = mDynetoMet * self.fcc[i]
ci = rdt[i+1]-rdt[i]
Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0
Ri = Sn * MtoA * np.sqrt((3.0 * ci * float(self.Na) * Kb * self.T)/Ki)
oxyz = oxyz + (Ri * self.nmo[i]).reshape(-1, 3)
return oxyz
# Call this to return a random structure
def get_random_structure(self, emax=100):
gs = True
it = 0
while gs:
rxyz = self.__genrandomstruct__()
#gs = self.__check_atomic_distances__(rxyz)
e1 = self.__get_energy__(rxyz)
if not e1:
continue
gs = e1 - self.e0 - emax > 0
it += 1
#if it % 100 == 0:
# print "Failed after ", it
return e1, rxyz
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.