muammar/data.py

## data.py

element_symbols = [
  "Xx", "H", "He", "Li", "Be", "B", "C", "N", "O", "F",
  "Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K",
  "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu",
  "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In",
  "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr",
  "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm",
  "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au",
  "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac",
  "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es",
  "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt",
  "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og" ]

element_masses = [
  # from IUPAC http://www.chem.qmul.ac.uk/iupac/AtWt/
  # (2015 set, updated from 2013)
  0, 1.00784, 4.0026,
  6.938, 9.01218, 10.806, 12.011, 14.006, 15.9994, 18.9984, 20.1797,
  22.9898, 24.305, 26.9815,
  28.0855, 30.9738, 32.065, 35.453, 39.948, 39.0983, 40.078,
  44.9559, 47.867, 50.9415, 51.9961, 54.938, 55.845, 58.9332,
  58.6934, 63.546, 65.38, 69.723, 72.64, 74.9216, 78.971,
  79.904, 83.798, 85.4678, 87.62, 88.9058, 91.224, 92.9064,
  95.95, 97, 101.07, 102.9055, 106.42, 107.8682, 112.414,
  114.818, 118.71, 121.76, 127.6, 126.9045, 131.293, 132.9055,
  137.327, 138.9055, 140.116, 140.9077, 144.242, 145, 150.36,
  151.964, 157.25, 158.9253, 162.5, 164.9303, 167.259, 168.9342,
  173.045, 174.9668, 178.49, 180.9479, 183.84, 186.207, 190.23,
  192.217, 195.084, 196.9666, 200.592, 204.38, 207.2, 208.9804,
  209, 210, 222, 223, 226, 227, 232.0377,
  231.0358, 238.0289, 237, 244, 243, 247, 247,
  251, 252, 257, 258, 259, 262, 267,
  270, 269, 270, 270, 278, 281, 281,
  285, 286, 289, 289, 293, 293, 294 ]

element_radii = [ # covalent radii
  # From Pyykko doi: 10.1002/chem.200800987
  # Dummy, 1st row
  0.18, 0.32, 0.46,
  # 2nd row
  1.33, 1.02, 0.85, 0.75, 0.71, 0.63, 0.64, 0.67,
  # 3rd row
  1.55, 1.39, 1.26, 1.16, 1.11, 1.03, 0.99, 0.96,
  # 4th row K, Ca
  1.96,  1.71,
  # 1st row TM (Sc.. Zn)
  1.48, 1.36, 1.34, 1.22, 1.19, 1.16, 1.11, 1.10, 1.12, 1.18,
  # 4th row p-block (Ga..Kr)
  1.24, 1.21, 1.21, 1.16, 1.14, 1.17,
  # 5th row Rb, Sr
  2.10, 1.85,
  # 2nd row TM (Y..Cd)
  1.63, 1.54, 1.47, 1.38, 1.28, 1.25, 1.25, 1.20, 1.28, 1.36,
  # 5th row p-block (In..Xe)
  1.42, 1.40, 1.40, 1.36, 1.33, 1.31,
  # 6th row Cs, Ba
  2.32, 1.96,
  # Lanthanides La..Gd
  1.80, 1.63, 1.76, 1.74, 1.73, 1.72, 1.68,
  # Lanthanides Tb..Yb
  1.69, 1.68, 1.67, 1.66, 1.65, 1.64, 1.70,
  # 3rd row TM (Lu..Hg)
  1.62, 1.52, 1.46, 1.37, 1.31, 1.29, 1.22, 1.23, 1.24, 1.33,
  # 6th row p-block (Tl..Rn)
  1.44, 1.44, 1.51, 1.45, 1.47, 1.42,
  # 7th row Fr, Ra
  2.23, 2.01,
  # Actinides (Ac.. Am)
  1.86, 1.75, 1.69, 1.70, 1.71, 1.72, 1.66,
  # Actinides (Cm..No)
  1.66, 1.68, 1.68, 1.65, 1.67, 1.73, 1.76,
  # Trans-actinides
  1.61, 1.57, 1.49, 1.43, 1.41, 1.34, 1.29, 1.28, 1.21, 1.22,
  1.36, 1.43, 1.62, 1.75, 1.65, 1.57
]

## gen-conformers.py
#!/usr/bin/env python

from __future__ import print_function

import sys, os
import logging
import numpy as np
from cclib.io import ccopen
from nms import nmsgenerator
from data import element_masses, element_symbols


def nms_ts(n):
    """Normal Mode Temperature and Samples

    Args:
        n (int): The number of non-hydrogen atoms.

    Returns:
        temperature (float): thermal energy to populate normal normal_modes
        samples     (int):   number of samples to consider (ignored)
    """
    stable =  {1: (2000.0, 500),
               2: (1500.0, 450),
               3: (1000.0, 425),
               4: ( 600.0, 400),
               5: ( 600.0, 200),
               6: ( 600.0,  30),
               7: ( 600.0,  20),
               8: ( 450.0,   5),
               9: ( 450.0,   3)}
    if n in stable: return stable[n]
    else: return (400, 2)

# read through a bunch of files on the command-line
for arg in sys.argv[1:]:
    # read the file with cclib
    file = ccopen(arg)
    file.logger.setLevel(logging.ERROR)
    molecule = file.parse()

    # the atom labels (here as atomic numbers)
    labels = molecule.atomnos
    try:
        # not all cclib parsers will populate atommasses
        # but this is more accurate, since it would include isotopes
        mass = np.asarray(molecule.atommasses).repeat(3)
    except AttributeError:
        mass = np.asarray([element_masses[i] for i in labels]).repeat(3)

    # get the last geometry from a geometry optimization
    # should end up as an ndarray (N, 3)
    xyz = np.asarray(molecule.atomcoords[-1]).reshape(-1,3)

    # vibrational frequencies (remove any zeros, e.g., translations, rotations)
    freq = np.asarray(molecule.vibfreqs)
    freq = freq[freq !=0.0] # should now be an array of size (3N-6,)

    # get flattened normal modes and reshape to (3N, 3N)
    # ensure they're in (CartesianCoordinate, ModeNumber)
    nmo = np.asarray(molecule.vibdisps).reshape(-1, xyz.shape[0]*3) # [3N, 3N]
    # orca filters out translations and rotations
    # they appear as modes with all-zero components
    # so please remove them
    nmo = nmo[np.array([not (i == 0.0).all() for i in nmo])]

    # compute mass weighted force constants for each of 3N-6 normal modes
    # shapes: [3N-6] * ([3N-6, 3N] * [3N,]).sum(axis=1) = [3N-6,]
    fcc = freq * freq * (nmo * nmo * mass).sum(axis=1) / 16.9744 / 10e4

    # get the temperature and number of samples for sampling
    t, s = nms_ts(np.count_nonzero(labels != 1))
    g = nmsgenerator(xyz, nmo, fcc, labels, t, 0)
    # generate 100 displaced geometries into new XYZ files
    filename, extension = os.path.splitext(arg)
    for i in range(100):
        energy, rxyz = g.get_random_structure(100)
        # print an xyz file
        xyzstr = '{}\n{}\n'.format(len(rxyz), energy)
        for l, x in zip(labels, rxyz):
            xyzstr += '{} {} {} {}\n'.format(element_symbols[l], *x)

        new_name = '{}.xyz'.format(i)
        with open(new_name, 'w') as f:
            f.write(xyzstr)

## nms.py
import numpy as np
import pybel
import openbabel as ob
from data import element_radii

mDynetoMet = 1.0e-5 * 1.0e-3 * 1.0e10
# boltzmann constant (in )
Kb = 1.38064852e-23
# meters to Angstrom
MtoA = 1.0e10

class nmsgenerator():
    # xyz = initial min structure
    # nmo = normal mode displacements
    # fcc = force constants
    # spc = atomic species list
    # T   = temperature of displacement

    try:
        et = ob.OBElementTable
    except:
        et = {}  # OpenBabel 3 doesn't use this class anymore

    def __init__(self,xyz,nmo,fcc,spc,T,minfc = 1.0E-3):
        self.xyz = xyz
        self.nmo = nmo
        self.labels = spc
        self.fcc = np.array([i if i > minfc else minfc for i in fcc])
        self.chg = np.array([self.__getCharge__(i) for i in spc])
        self.radii = [element_radii[i] for i in self.chg]
        self.T = T
        self.Na = xyz.shape[0]
        self.Nf = nmo.shape[0]
        self.e0 = self.__get_energy__(xyz)

    # returns atomic charge (number)
    def __getCharge__(self,t):
        if isinstance(t, (int, np.int32)):
            return t
        return self.et.GetAtomicNum(t)

    # Checks for small atomic distances
    def __check_atomic_distances__(self,rxyz):
        for i in range(0,self.Na):
            for j in range(i+1,self.Na):
                Rij = np.linalg.norm(rxyz[i]-rxyz[j])
                if Rij < 0.6 * (self.radii[i] * self.radii[j]):
                    return True
        return False

    # get energy
    def __get_energy__(self, rxyz):
        xyzstr = '{}\n\n'.format(len(rxyz))
        for l, x in zip(self.labels, rxyz):
            xyzstr += '{} {} {} {}\n'.format(l, *x)
        mol = pybel.readstring('xyz', xyzstr)
        ff = pybel._forcefields['uff']
        if not ff.Setup(mol.OBMol):
            return None
        return ff.Energy()

    # Generate a structure
    def __genrandomstruct__(self):
        rdt = np.random.random(self.Nf+1)
        rdt[0] = 0.0
        norm = np.random.random(1)[0]
        rdt = norm*np.sort(rdt)
        rdt[self.Nf] = norm

        oxyz = self.xyz.copy()

        for i in range(self.Nf):
            # convert force constants from mDyne to atomic
            Ki = mDynetoMet * self.fcc[i]
            ci = rdt[i+1]-rdt[i]
            Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0
            Ri = Sn * MtoA * np.sqrt((3.0 * ci * float(self.Na) * Kb * self.T)/Ki)
            oxyz = oxyz + (Ri * self.nmo[i]).reshape(-1, 3)
        return oxyz

    # Call this to return a random structure
    def get_random_structure(self, emax=100):
        gs = True
        it = 0
        while gs:
            rxyz = self.__genrandomstruct__()
            #gs = self.__check_atomic_distances__(rxyz)
            e1 = self.__get_energy__(rxyz)
            if not e1:
                continue
            gs = e1 - self.e0 - emax > 0
            it += 1
            #if it % 100 == 0:
            #    print "Failed after ", it
        return e1, rxyz

	element_symbols = [
	"Xx", "H", "He", "Li", "Be", "B", "C", "N", "O", "F",
	"Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K",
	"Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu",
	"Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
	"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In",
	"Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr",
	"Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm",
	"Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au",
	"Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac",
	"Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es",
	"Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt",
	"Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og" ]

	element_masses = [
	# from IUPAC http://www.chem.qmul.ac.uk/iupac/AtWt/
	# (2015 set, updated from 2013)
	0, 1.00784, 4.0026,
	6.938, 9.01218, 10.806, 12.011, 14.006, 15.9994, 18.9984, 20.1797,
	22.9898, 24.305, 26.9815,
	28.0855, 30.9738, 32.065, 35.453, 39.948, 39.0983, 40.078,
	44.9559, 47.867, 50.9415, 51.9961, 54.938, 55.845, 58.9332,
	58.6934, 63.546, 65.38, 69.723, 72.64, 74.9216, 78.971,
	79.904, 83.798, 85.4678, 87.62, 88.9058, 91.224, 92.9064,
	95.95, 97, 101.07, 102.9055, 106.42, 107.8682, 112.414,
	114.818, 118.71, 121.76, 127.6, 126.9045, 131.293, 132.9055,
	137.327, 138.9055, 140.116, 140.9077, 144.242, 145, 150.36,
	151.964, 157.25, 158.9253, 162.5, 164.9303, 167.259, 168.9342,
	173.045, 174.9668, 178.49, 180.9479, 183.84, 186.207, 190.23,
	192.217, 195.084, 196.9666, 200.592, 204.38, 207.2, 208.9804,
	209, 210, 222, 223, 226, 227, 232.0377,
	231.0358, 238.0289, 237, 244, 243, 247, 247,
	251, 252, 257, 258, 259, 262, 267,
	270, 269, 270, 270, 278, 281, 281,
	285, 286, 289, 289, 293, 293, 294 ]

	element_radii = [ # covalent radii
	# From Pyykko doi: 10.1002/chem.200800987
	# Dummy, 1st row
	0.18, 0.32, 0.46,
	# 2nd row
	1.33, 1.02, 0.85, 0.75, 0.71, 0.63, 0.64, 0.67,
	# 3rd row
	1.55, 1.39, 1.26, 1.16, 1.11, 1.03, 0.99, 0.96,
	# 4th row K, Ca
	1.96, 1.71,
	# 1st row TM (Sc.. Zn)
	1.48, 1.36, 1.34, 1.22, 1.19, 1.16, 1.11, 1.10, 1.12, 1.18,
	# 4th row p-block (Ga..Kr)
	1.24, 1.21, 1.21, 1.16, 1.14, 1.17,
	# 5th row Rb, Sr
	2.10, 1.85,
	# 2nd row TM (Y..Cd)
	1.63, 1.54, 1.47, 1.38, 1.28, 1.25, 1.25, 1.20, 1.28, 1.36,
	# 5th row p-block (In..Xe)
	1.42, 1.40, 1.40, 1.36, 1.33, 1.31,
	# 6th row Cs, Ba
	2.32, 1.96,
	# Lanthanides La..Gd
	1.80, 1.63, 1.76, 1.74, 1.73, 1.72, 1.68,
	# Lanthanides Tb..Yb
	1.69, 1.68, 1.67, 1.66, 1.65, 1.64, 1.70,
	# 3rd row TM (Lu..Hg)
	1.62, 1.52, 1.46, 1.37, 1.31, 1.29, 1.22, 1.23, 1.24, 1.33,
	# 6th row p-block (Tl..Rn)
	1.44, 1.44, 1.51, 1.45, 1.47, 1.42,
	# 7th row Fr, Ra
	2.23, 2.01,
	# Actinides (Ac.. Am)
	1.86, 1.75, 1.69, 1.70, 1.71, 1.72, 1.66,
	# Actinides (Cm..No)
	1.66, 1.68, 1.68, 1.65, 1.67, 1.73, 1.76,
	# Trans-actinides
	1.61, 1.57, 1.49, 1.43, 1.41, 1.34, 1.29, 1.28, 1.21, 1.22,
	1.36, 1.43, 1.62, 1.75, 1.65, 1.57
	]
	#!/usr/bin/env python

	from __future__ import print_function

	import sys, os
	import logging
	import numpy as np
	from cclib.io import ccopen
	from nms import nmsgenerator
	from data import element_masses, element_symbols


	def nms_ts(n):
	"""Normal Mode Temperature and Samples

	Args:
	n (int): The number of non-hydrogen atoms.

	Returns:
	temperature (float): thermal energy to populate normal normal_modes
	samples (int): number of samples to consider (ignored)
	"""
	stable = {1: (2000.0, 500),
	2: (1500.0, 450),
	3: (1000.0, 425),
	4: ( 600.0, 400),
	5: ( 600.0, 200),
	6: ( 600.0, 30),
	7: ( 600.0, 20),
	8: ( 450.0, 5),
	9: ( 450.0, 3)}
	if n in stable: return stable[n]
	else: return (400, 2)

	# read through a bunch of files on the command-line
	for arg in sys.argv[1:]:
	# read the file with cclib
	file = ccopen(arg)
	file.logger.setLevel(logging.ERROR)
	molecule = file.parse()

	# the atom labels (here as atomic numbers)
	labels = molecule.atomnos
	try:
	# not all cclib parsers will populate atommasses
	# but this is more accurate, since it would include isotopes
	mass = np.asarray(molecule.atommasses).repeat(3)
	except AttributeError:
	mass = np.asarray([element_masses[i] for i in labels]).repeat(3)

	# get the last geometry from a geometry optimization
	# should end up as an ndarray (N, 3)
	xyz = np.asarray(molecule.atomcoords[-1]).reshape(-1,3)

	# vibrational frequencies (remove any zeros, e.g., translations, rotations)
	freq = np.asarray(molecule.vibfreqs)
	freq = freq[freq !=0.0] # should now be an array of size (3N-6,)

	# get flattened normal modes and reshape to (3N, 3N)
	# ensure they're in (CartesianCoordinate, ModeNumber)
	nmo = np.asarray(molecule.vibdisps).reshape(-1, xyz.shape[0]*3) # [3N, 3N]
	# orca filters out translations and rotations
	# they appear as modes with all-zero components
	# so please remove them
	nmo = nmo[np.array([not (i == 0.0).all() for i in nmo])]

	# compute mass weighted force constants for each of 3N-6 normal modes
	# shapes: [3N-6] * ([3N-6, 3N] * [3N,]).sum(axis=1) = [3N-6,]
	fcc = freq * freq * (nmo * nmo * mass).sum(axis=1) / 16.9744 / 10e4

	# get the temperature and number of samples for sampling
	t, s = nms_ts(np.count_nonzero(labels != 1))
	g = nmsgenerator(xyz, nmo, fcc, labels, t, 0)
	# generate 100 displaced geometries into new XYZ files
	filename, extension = os.path.splitext(arg)
	for i in range(100):
	energy, rxyz = g.get_random_structure(100)
	# print an xyz file
	xyzstr = '{}\n{}\n'.format(len(rxyz), energy)
	for l, x in zip(labels, rxyz):
	xyzstr += '{} {} {} {}\n'.format(element_symbols[l], *x)

	new_name = '{}.xyz'.format(i)
	with open(new_name, 'w') as f:
	f.write(xyzstr)
	import numpy as np
	import pybel
	import openbabel as ob
	from data import element_radii

	mDynetoMet = 1.0e-5 * 1.0e-3 * 1.0e10
	# boltzmann constant (in )
	Kb = 1.38064852e-23
	# meters to Angstrom
	MtoA = 1.0e10

	class nmsgenerator():
	# xyz = initial min structure
	# nmo = normal mode displacements
	# fcc = force constants
	# spc = atomic species list
	# T = temperature of displacement

	try:
	et = ob.OBElementTable
	except:
	et = {} # OpenBabel 3 doesn't use this class anymore

	def __init__(self,xyz,nmo,fcc,spc,T,minfc = 1.0E-3):
	self.xyz = xyz
	self.nmo = nmo
	self.labels = spc
	self.fcc = np.array([i if i > minfc else minfc for i in fcc])
	self.chg = np.array([self.__getCharge__(i) for i in spc])
	self.radii = [element_radii[i] for i in self.chg]
	self.T = T
	self.Na = xyz.shape[0]
	self.Nf = nmo.shape[0]
	self.e0 = self.__get_energy__(xyz)

	# returns atomic charge (number)
	def __getCharge__(self,t):
	if isinstance(t, (int, np.int32)):
	return t
	return self.et.GetAtomicNum(t)

	# Checks for small atomic distances
	def __check_atomic_distances__(self,rxyz):
	for i in range(0,self.Na):
	for j in range(i+1,self.Na):
	Rij = np.linalg.norm(rxyz[i]-rxyz[j])
	if Rij < 0.6 * (self.radii[i] * self.radii[j]):
	return True
	return False

	# get energy
	def __get_energy__(self, rxyz):
	xyzstr = '{}\n\n'.format(len(rxyz))
	for l, x in zip(self.labels, rxyz):
	xyzstr += '{} {} {} {}\n'.format(l, *x)
	mol = pybel.readstring('xyz', xyzstr)
	ff = pybel._forcefields['uff']
	if not ff.Setup(mol.OBMol):
	return None
	return ff.Energy()

	# Generate a structure
	def __genrandomstruct__(self):
	rdt = np.random.random(self.Nf+1)
	rdt[0] = 0.0
	norm = np.random.random(1)[0]
	rdt = norm*np.sort(rdt)
	rdt[self.Nf] = norm

	oxyz = self.xyz.copy()

	for i in range(self.Nf):
	# convert force constants from mDyne to atomic
	Ki = mDynetoMet * self.fcc[i]
	ci = rdt[i+1]-rdt[i]
	Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0
	Ri = Sn * MtoA * np.sqrt((3.0 * ci * float(self.Na) * Kb * self.T)/Ki)
	oxyz = oxyz + (Ri * self.nmo[i]).reshape(-1, 3)
	return oxyz

	# Call this to return a random structure
	def get_random_structure(self, emax=100):
	gs = True
	it = 0
	while gs:
	rxyz = self.__genrandomstruct__()
	#gs = self.__check_atomic_distances__(rxyz)
	e1 = self.__get_energy__(rxyz)
	if not e1:
	continue
	gs = e1 - self.e0 - emax > 0
	it += 1
	#if it % 100 == 0:
	# print "Failed after ", it
	return e1, rxyz