Skip to content

Instantly share code, notes, and snippets.

@cstein
Created December 1, 2018 10:22
Show Gist options
  • Save cstein/003a4a663b2082317d76e589da2c8362 to your computer and use it in GitHub Desktop.
Save cstein/003a4a663b2082317d76e589da2c8362 to your computer and use it in GitHub Desktop.
Creating QM, MM and QM/MM regions from .xyz files.
""" Script to generate appropriate QM, MM and QM/MM files """
from operator import itemgetter
import sys
import numpy
# shamelessly stolen from molecool
aa2au = 1.8897261249935897 # bohr / AA
# converts nuclear charge to atom label
Z2LABEL = {
1: 'H', 2: 'He',
3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne',
11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar',
19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe',
35: 'Br',
53: 'I'
}
# converts an atomic label to a nuclear charge
LABEL2Z = {}
for key in Z2LABEL:
LABEL2Z[Z2LABEL[key]] = key
def write_dalton_mol_basis(coordinates, labels, fragments, **kwargs):
""" Writes a DALTON mol input file either to input or to screen
:param coordinates: the coordinates to write to file
:param labels: the atom labels to use
:param fragments: definition of fragments
:param kwargs: keyword arguments
:type coordinates: numpy.ndarray
:type labels: list
:type fragments: list
:type kwargs: dict
"""
outfile = kwargs.get('outfile', None)
basis = kwargs.get('basis', '6-31+G*')
charge = 0
if fragments is None:
if len(coordinates) % 2 == 1:
charge = -1
else:
if len(fragments[0]) % 2 == 1:
charge = -1
charge = kwargs.get('charge', charge)
unit = kwargs.get('unit', 'AA').lower()
unit_factor = 1.0
unit_label = ' Angstrom'
if unit == 'au' or unit == 'bohr':
unit_factor = aa2au
unit_label = ''
n_atom_types = 0
HEADER = "BASIS\n{0:s}\n\n\nAtomTypes={1:d} Charge={2:d}{3:s} NoSymmetry\n"
ATOM_TYPE_HEADER = "Charge={0:.1f} Atoms={1:d}\n"
ATOM_LINE = "{0:1s}{1[0]:20.7f}{1[1]:14.7f}{1[2]:14.7f}\n"
# first we gather some data
atom_types = []
atom_charges = []
atom_counts = [] # counts atoms for every atom type
prev_label = ""
for i, label in enumerate(labels):
if label != prev_label:
atom_types.append(label)
atom_counts.append(0)
prev_label = label
atom_counts[len(atom_types)-1] += 1
out_string = HEADER.format(basis, len(atom_types), charge, unit_label)
atom_lines = ""
prev_label = ""
atom_type = 0
for c, label in zip(coordinates*unit_factor, labels):
if label != prev_label:
out_string += ATOM_TYPE_HEADER.format(LABEL2Z[label], atom_counts[atom_type])
prev_label = label
atom_type += 1
out_string += ATOM_LINE.format(label, c)
if outfile is not None:
with open(outfile, 'w') as outf:
outf.write(out_string)
else:
print out_string[:-1]
def write_pdb(fragments, coordinates, labels, **kwargs):
""" Writes an appropriate .pdb file based on fragments, coordinates and labels
:param coordinates: the coordinates to write to file
:param labels: the atom labels to use
:param fragments: definition of fragments
:param kwargs: keyword arguments
:type coordinates: numpy.ndarray
:type labels: list
:type fragments: list
:type kwargs: dict
"""
res_name = kwargs.get('resname', 'UNK')
resid = kwargs.get('resid', 1)
outfile = kwargs.get('outfile', None)
chain_name = kwargs.get('chainname', 'A')
PDB_LINE = "HETATM{0:5d}{1:>3s} {5:3s} {6:1s}{2:4d}{3[0]:12.3f}{3[1]:8.3f}{3[2]:8.3f} 1.00 0.00 {4:1s}"
if outfile is not None:
PDB_LINE += "\n"
fout = open(outfile, 'w')
cin = 1
for i, fragment in enumerate(fragments, start=resid):
for c, l in zip(coordinates[fragment], labels[fragment]):
if outfile is not None:
fout.write(PDB_LINE.format(cin, l, i, c, l[0], res_name, chain_name))
else:
print PDB_LINE.format(cin, l, i, c, l[0], res_name, chain_name)
cin += 1
if outfile is not None:
fout.close()
def write_xyz(coordinates, labels, outfile=None):
""" Writes an .xyz file either to screen or to a file
:param coordinates: the coordinates to write to file
:param labels: the atom labels to use
:param outfile: the output filename. If not specified the value defaults to None
:type coordinates: numpy.ndarray
:type labels: list
:type outfile: string
"""
assert len(coordinates) == len(labels)
HEADER = "{0:d}\n".format(len(labels))
XYZ_LINE = "{0:1s}{1[0]:20.9f}{1[1]:16.9f}{1[2]:16.9f}"
if outfile is not None:
XYZ_LINE += "\n"
HEADER += "\n"
fout = open(outfile, 'w')
fout.write(HEADER)
else:
print HEADER
for c, l in zip(coordinates, labels):
if outfile is not None:
fout.write(XYZ_LINE.format(l[0], c))
else:
print XYZ_LINE.format(l[0], c)
if outfile is not None:
fout.close()
def load_mm_file(filename='ptchrg.xyz'):
""" Loads and parses the MM region data
:param filename: the file to load
:type filename: string
:return: fragment definition, labels and coordinates
:rtype: (numpy.ndarray, numpy.ndarray, numpy.ndarray)
"""
coordinates = []
with open(filename, 'r') as infile:
for cin, inline in enumerate(infile, start=-2):
if cin < 0:
pass
else:
try:
data = map(float, inline.split())
except ValueError:
data = ['X'] + map(float, inline.split()[1:])
finally:
coordinates.append(data[1:])
fragments = [[3*i,3*i+1,3*i+2] for i in range(len(coordinates)/3) ]
fragment_labels = [['O', 'H', 'H'] for i in range(len(coordinates)/3) ]
return numpy.array(fragments, dtype=int), numpy.ravel(numpy.char.asarray(fragment_labels)), numpy.array(coordinates)
def load_qm_chrom(filename='inpfile.xyz'):
""" Loads and parses the QM chromophore
:param filename: the file to load
:type filename: string
:return: labels, coordinates and molecular charge
:rtype: (list, numpy.ndarray, int)
"""
nat_chrom = 30
labels = []
coordinates = []
charge = 0
with open(filename, 'r') as infile:
for cin, inline in enumerate(infile, start=-2):
if cin < 0:
if cin == -2:
isneutral = int(inline) % 2 == 0
if not isneutral:
nat_chrom = 29
charge = -1
elif cin < nat_chrom and cin >= 0:
tokens = inline.split()
labels.append(tokens[0])
coordinates.append(map(float, tokens[1:]))
return labels, numpy.array(coordinates), charge
def min_fragment_distance_to_qm(qm_c, mm_c, fragment):
""" Computes the minimum distance from a fragment to the qm region
:param qm_c: coordinates of the QM region
:param mm_c: coordinates of the MM region
:param fragment: fragment indices
:type qm_c: numpy.ndarray
:type mm_c: numpy.ndarray
:type fragment: list
:returns: the minimum distance (in Angstrom)
:rtype: float
"""
R = 1.0e30
for qm_cc in qm_c:
for mm_cc in mm_c[fragment]:
dr = qm_cc - mm_cc
dr2 = dr.dot(dr)
if dr2 < R:
R = dr2
return numpy.sqrt(R)
def fragment_distances_to_qm(qm_c, mm_c, fragments):
""" Computes all fragment _minimum_ distances to QM region
sorts the indices according to distance
:param qm_c: coordinates of the QM region
:param mm_c: coordinates of the MM region
:param fragments: fragment indices as a list of lists
:type qm_c: numpy.ndarray
:type mm_c: numpy.ndarray
:type fragments: list()
:returns: a sorted list of fragment indices and distances
:rtype: (numpy.ndarray, int)
"""
Rs = []
for i, fragment in enumerate(fragments):
Rs.append( (i, min_fragment_distance_to_qm(qm_c, mm_c, fragment)) )
sorted_Rs = sorted(Rs, key=itemgetter(1), reverse=False)
sorted_Rs_T = numpy.array(sorted_Rs).T
return sorted_Rs_T[0].astype(int), sorted_Rs_T[1]
def analyse_for_hbonds(qm_c, qm_l, mm_c, mm_l, fragments):
""" Analyses for hydrogen bonds between QM region and MM region
:param qm_c: coordinates of the QM region
:param qm_l: labels of the QM region
:param mm_c: coordinates of the MM region
:param mm_l: labels of the MM region
:param fragments: fragment indices as a list of lists
:type qm_c: numpy.ndarray
:type qm_l: numpy.ndarray
:type mm_c: numpy.ndarray
:type mm_l: numpy.ndarray
:type fragments: list()
:returns: a list of fragments hydrogen bonding to the QM region
:rtype: numpy.ndarray
"""
hb_fragments = []
for i, fragment in enumerate(fragments):
for mm_cc, mm_ll in zip(mm_c[fragment], mm_l[fragment]):
if i in hb_fragments:
break
for qm_cc, qm_ll in zip(qm_c, qm_l):
if qm_ll not in ["H", "O"]:
continue
if qm_ll == mm_ll[0]:
continue
dr = qm_cc - mm_cc
R2 = dr.dot(dr)
if R2 < 6.25:
hb_fragments.append(i)
break
return numpy.asarray(hb_fragments, dtype=int)
def build_qm_region(qm_c, qm_l, mm_c_in, mm_l_in, outfile=None):
""" Builds a QM region by concatenating approrpiate QM and MM atoms
:param qm_c: coordinates of the QM region
:param qm_l: labels of the QM region
:param mm_c: coordinates of the MM region
:param mm_l: labels of the MM region
:param outfile: output filename
:type qm_c: numpy.ndarray
:type qm_l: numpy.ndarray
:type mm_c: numpy.ndarray
:type mm_l: numpy.ndarray
:type outfile: string
:returns: coordinates, labels and fragment indices for the QM region
:rtype: (numpy.ndarray, numpy.ndarray, list)
"""
fragments = [range(0, len(qm_l))]
for i in range(len(mm_c_in)):
maxvalue = max(fragments[i])
fragments.extend([range(maxvalue + 1, maxvalue + 4)])
cc = numpy.reshape(mm_c_in.ravel(), (len(mm_c_in) * 3, 3))
cc = numpy.concatenate((qm_c, cc))
ll = numpy.reshape(mm_l_in.ravel(), (3 * len(mm_l_in)))
ll = numpy.concatenate((qm_l, ll))
return cc, ll, fragments
if __name__ == '__main__':
#
# load files and compute some properties
#
qm_l, qm_c, qm_q = load_qm_chrom()
fragments, fragment_labels, mm_c = load_mm_file()
sorted_ids, sorted_Rs = fragment_distances_to_qm(qm_c, mm_c, fragments[:])
#
# now we investigate hydrogen bonding
#
hbond_ids = analyse_for_hbonds(qm_c, qm_l, mm_c, fragment_labels, fragments[sorted_ids[0:50]])
sorted_hbond_ids = sorted_ids[hbond_ids]
# sorted complement ids
complement_ids = []
tmp = numpy.setdiff1d(sorted_ids, sorted_hbond_ids)
for id in sorted_ids:
if id in tmp:
complement_ids.append(id)
# dump what will _always_ be tip3p
write_pdb(fragments[sorted_ids[50:]], mm_c, fragment_labels, resname='T3P', resid=51, outfile='mm.pdb')
# now dump the MM region without the QM region
write_pdb(fragments[complement_ids[0:50-len(sorted_hbond_ids)]], mm_c, fragment_labels, resname='WAT', resid=len(sorted_hbond_ids)+1, outfile='pe.pdb')
# now create and dump the QM region
c, l, f = build_qm_region(qm_c, qm_l, mm_c[fragments[sorted_hbond_ids]], fragment_labels[fragments[sorted_hbond_ids]])
write_pdb(f, c, l, resname="LIG", outfile='qm.pdb')
write_dalton_mol_basis(c, l, f, charge=qm_q, outfile='qm.mol')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment