Created
December 1, 2018 10:22
-
-
Save cstein/003a4a663b2082317d76e589da2c8362 to your computer and use it in GitHub Desktop.
Creating QM, MM and QM/MM regions from .xyz files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Script to generate appropriate QM, MM and QM/MM files """ | |
from operator import itemgetter | |
import sys | |
import numpy | |
# shamelessly stolen from molecool | |
aa2au = 1.8897261249935897 # bohr / AA | |
# converts nuclear charge to atom label | |
Z2LABEL = { | |
1: 'H', 2: 'He', | |
3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', | |
11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', | |
19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', | |
35: 'Br', | |
53: 'I' | |
} | |
# converts an atomic label to a nuclear charge | |
LABEL2Z = {} | |
for key in Z2LABEL: | |
LABEL2Z[Z2LABEL[key]] = key | |
def write_dalton_mol_basis(coordinates, labels, fragments, **kwargs): | |
""" Writes a DALTON mol input file either to input or to screen | |
:param coordinates: the coordinates to write to file | |
:param labels: the atom labels to use | |
:param fragments: definition of fragments | |
:param kwargs: keyword arguments | |
:type coordinates: numpy.ndarray | |
:type labels: list | |
:type fragments: list | |
:type kwargs: dict | |
""" | |
outfile = kwargs.get('outfile', None) | |
basis = kwargs.get('basis', '6-31+G*') | |
charge = 0 | |
if fragments is None: | |
if len(coordinates) % 2 == 1: | |
charge = -1 | |
else: | |
if len(fragments[0]) % 2 == 1: | |
charge = -1 | |
charge = kwargs.get('charge', charge) | |
unit = kwargs.get('unit', 'AA').lower() | |
unit_factor = 1.0 | |
unit_label = ' Angstrom' | |
if unit == 'au' or unit == 'bohr': | |
unit_factor = aa2au | |
unit_label = '' | |
n_atom_types = 0 | |
HEADER = "BASIS\n{0:s}\n\n\nAtomTypes={1:d} Charge={2:d}{3:s} NoSymmetry\n" | |
ATOM_TYPE_HEADER = "Charge={0:.1f} Atoms={1:d}\n" | |
ATOM_LINE = "{0:1s}{1[0]:20.7f}{1[1]:14.7f}{1[2]:14.7f}\n" | |
# first we gather some data | |
atom_types = [] | |
atom_charges = [] | |
atom_counts = [] # counts atoms for every atom type | |
prev_label = "" | |
for i, label in enumerate(labels): | |
if label != prev_label: | |
atom_types.append(label) | |
atom_counts.append(0) | |
prev_label = label | |
atom_counts[len(atom_types)-1] += 1 | |
out_string = HEADER.format(basis, len(atom_types), charge, unit_label) | |
atom_lines = "" | |
prev_label = "" | |
atom_type = 0 | |
for c, label in zip(coordinates*unit_factor, labels): | |
if label != prev_label: | |
out_string += ATOM_TYPE_HEADER.format(LABEL2Z[label], atom_counts[atom_type]) | |
prev_label = label | |
atom_type += 1 | |
out_string += ATOM_LINE.format(label, c) | |
if outfile is not None: | |
with open(outfile, 'w') as outf: | |
outf.write(out_string) | |
else: | |
print out_string[:-1] | |
def write_pdb(fragments, coordinates, labels, **kwargs): | |
""" Writes an appropriate .pdb file based on fragments, coordinates and labels | |
:param coordinates: the coordinates to write to file | |
:param labels: the atom labels to use | |
:param fragments: definition of fragments | |
:param kwargs: keyword arguments | |
:type coordinates: numpy.ndarray | |
:type labels: list | |
:type fragments: list | |
:type kwargs: dict | |
""" | |
res_name = kwargs.get('resname', 'UNK') | |
resid = kwargs.get('resid', 1) | |
outfile = kwargs.get('outfile', None) | |
chain_name = kwargs.get('chainname', 'A') | |
PDB_LINE = "HETATM{0:5d}{1:>3s} {5:3s} {6:1s}{2:4d}{3[0]:12.3f}{3[1]:8.3f}{3[2]:8.3f} 1.00 0.00 {4:1s}" | |
if outfile is not None: | |
PDB_LINE += "\n" | |
fout = open(outfile, 'w') | |
cin = 1 | |
for i, fragment in enumerate(fragments, start=resid): | |
for c, l in zip(coordinates[fragment], labels[fragment]): | |
if outfile is not None: | |
fout.write(PDB_LINE.format(cin, l, i, c, l[0], res_name, chain_name)) | |
else: | |
print PDB_LINE.format(cin, l, i, c, l[0], res_name, chain_name) | |
cin += 1 | |
if outfile is not None: | |
fout.close() | |
def write_xyz(coordinates, labels, outfile=None): | |
""" Writes an .xyz file either to screen or to a file | |
:param coordinates: the coordinates to write to file | |
:param labels: the atom labels to use | |
:param outfile: the output filename. If not specified the value defaults to None | |
:type coordinates: numpy.ndarray | |
:type labels: list | |
:type outfile: string | |
""" | |
assert len(coordinates) == len(labels) | |
HEADER = "{0:d}\n".format(len(labels)) | |
XYZ_LINE = "{0:1s}{1[0]:20.9f}{1[1]:16.9f}{1[2]:16.9f}" | |
if outfile is not None: | |
XYZ_LINE += "\n" | |
HEADER += "\n" | |
fout = open(outfile, 'w') | |
fout.write(HEADER) | |
else: | |
print HEADER | |
for c, l in zip(coordinates, labels): | |
if outfile is not None: | |
fout.write(XYZ_LINE.format(l[0], c)) | |
else: | |
print XYZ_LINE.format(l[0], c) | |
if outfile is not None: | |
fout.close() | |
def load_mm_file(filename='ptchrg.xyz'): | |
""" Loads and parses the MM region data | |
:param filename: the file to load | |
:type filename: string | |
:return: fragment definition, labels and coordinates | |
:rtype: (numpy.ndarray, numpy.ndarray, numpy.ndarray) | |
""" | |
coordinates = [] | |
with open(filename, 'r') as infile: | |
for cin, inline in enumerate(infile, start=-2): | |
if cin < 0: | |
pass | |
else: | |
try: | |
data = map(float, inline.split()) | |
except ValueError: | |
data = ['X'] + map(float, inline.split()[1:]) | |
finally: | |
coordinates.append(data[1:]) | |
fragments = [[3*i,3*i+1,3*i+2] for i in range(len(coordinates)/3) ] | |
fragment_labels = [['O', 'H', 'H'] for i in range(len(coordinates)/3) ] | |
return numpy.array(fragments, dtype=int), numpy.ravel(numpy.char.asarray(fragment_labels)), numpy.array(coordinates) | |
def load_qm_chrom(filename='inpfile.xyz'): | |
""" Loads and parses the QM chromophore | |
:param filename: the file to load | |
:type filename: string | |
:return: labels, coordinates and molecular charge | |
:rtype: (list, numpy.ndarray, int) | |
""" | |
nat_chrom = 30 | |
labels = [] | |
coordinates = [] | |
charge = 0 | |
with open(filename, 'r') as infile: | |
for cin, inline in enumerate(infile, start=-2): | |
if cin < 0: | |
if cin == -2: | |
isneutral = int(inline) % 2 == 0 | |
if not isneutral: | |
nat_chrom = 29 | |
charge = -1 | |
elif cin < nat_chrom and cin >= 0: | |
tokens = inline.split() | |
labels.append(tokens[0]) | |
coordinates.append(map(float, tokens[1:])) | |
return labels, numpy.array(coordinates), charge | |
def min_fragment_distance_to_qm(qm_c, mm_c, fragment): | |
""" Computes the minimum distance from a fragment to the qm region | |
:param qm_c: coordinates of the QM region | |
:param mm_c: coordinates of the MM region | |
:param fragment: fragment indices | |
:type qm_c: numpy.ndarray | |
:type mm_c: numpy.ndarray | |
:type fragment: list | |
:returns: the minimum distance (in Angstrom) | |
:rtype: float | |
""" | |
R = 1.0e30 | |
for qm_cc in qm_c: | |
for mm_cc in mm_c[fragment]: | |
dr = qm_cc - mm_cc | |
dr2 = dr.dot(dr) | |
if dr2 < R: | |
R = dr2 | |
return numpy.sqrt(R) | |
def fragment_distances_to_qm(qm_c, mm_c, fragments): | |
""" Computes all fragment _minimum_ distances to QM region | |
sorts the indices according to distance | |
:param qm_c: coordinates of the QM region | |
:param mm_c: coordinates of the MM region | |
:param fragments: fragment indices as a list of lists | |
:type qm_c: numpy.ndarray | |
:type mm_c: numpy.ndarray | |
:type fragments: list() | |
:returns: a sorted list of fragment indices and distances | |
:rtype: (numpy.ndarray, int) | |
""" | |
Rs = [] | |
for i, fragment in enumerate(fragments): | |
Rs.append( (i, min_fragment_distance_to_qm(qm_c, mm_c, fragment)) ) | |
sorted_Rs = sorted(Rs, key=itemgetter(1), reverse=False) | |
sorted_Rs_T = numpy.array(sorted_Rs).T | |
return sorted_Rs_T[0].astype(int), sorted_Rs_T[1] | |
def analyse_for_hbonds(qm_c, qm_l, mm_c, mm_l, fragments): | |
""" Analyses for hydrogen bonds between QM region and MM region | |
:param qm_c: coordinates of the QM region | |
:param qm_l: labels of the QM region | |
:param mm_c: coordinates of the MM region | |
:param mm_l: labels of the MM region | |
:param fragments: fragment indices as a list of lists | |
:type qm_c: numpy.ndarray | |
:type qm_l: numpy.ndarray | |
:type mm_c: numpy.ndarray | |
:type mm_l: numpy.ndarray | |
:type fragments: list() | |
:returns: a list of fragments hydrogen bonding to the QM region | |
:rtype: numpy.ndarray | |
""" | |
hb_fragments = [] | |
for i, fragment in enumerate(fragments): | |
for mm_cc, mm_ll in zip(mm_c[fragment], mm_l[fragment]): | |
if i in hb_fragments: | |
break | |
for qm_cc, qm_ll in zip(qm_c, qm_l): | |
if qm_ll not in ["H", "O"]: | |
continue | |
if qm_ll == mm_ll[0]: | |
continue | |
dr = qm_cc - mm_cc | |
R2 = dr.dot(dr) | |
if R2 < 6.25: | |
hb_fragments.append(i) | |
break | |
return numpy.asarray(hb_fragments, dtype=int) | |
def build_qm_region(qm_c, qm_l, mm_c_in, mm_l_in, outfile=None): | |
""" Builds a QM region by concatenating approrpiate QM and MM atoms | |
:param qm_c: coordinates of the QM region | |
:param qm_l: labels of the QM region | |
:param mm_c: coordinates of the MM region | |
:param mm_l: labels of the MM region | |
:param outfile: output filename | |
:type qm_c: numpy.ndarray | |
:type qm_l: numpy.ndarray | |
:type mm_c: numpy.ndarray | |
:type mm_l: numpy.ndarray | |
:type outfile: string | |
:returns: coordinates, labels and fragment indices for the QM region | |
:rtype: (numpy.ndarray, numpy.ndarray, list) | |
""" | |
fragments = [range(0, len(qm_l))] | |
for i in range(len(mm_c_in)): | |
maxvalue = max(fragments[i]) | |
fragments.extend([range(maxvalue + 1, maxvalue + 4)]) | |
cc = numpy.reshape(mm_c_in.ravel(), (len(mm_c_in) * 3, 3)) | |
cc = numpy.concatenate((qm_c, cc)) | |
ll = numpy.reshape(mm_l_in.ravel(), (3 * len(mm_l_in))) | |
ll = numpy.concatenate((qm_l, ll)) | |
return cc, ll, fragments | |
if __name__ == '__main__': | |
# | |
# load files and compute some properties | |
# | |
qm_l, qm_c, qm_q = load_qm_chrom() | |
fragments, fragment_labels, mm_c = load_mm_file() | |
sorted_ids, sorted_Rs = fragment_distances_to_qm(qm_c, mm_c, fragments[:]) | |
# | |
# now we investigate hydrogen bonding | |
# | |
hbond_ids = analyse_for_hbonds(qm_c, qm_l, mm_c, fragment_labels, fragments[sorted_ids[0:50]]) | |
sorted_hbond_ids = sorted_ids[hbond_ids] | |
# sorted complement ids | |
complement_ids = [] | |
tmp = numpy.setdiff1d(sorted_ids, sorted_hbond_ids) | |
for id in sorted_ids: | |
if id in tmp: | |
complement_ids.append(id) | |
# dump what will _always_ be tip3p | |
write_pdb(fragments[sorted_ids[50:]], mm_c, fragment_labels, resname='T3P', resid=51, outfile='mm.pdb') | |
# now dump the MM region without the QM region | |
write_pdb(fragments[complement_ids[0:50-len(sorted_hbond_ids)]], mm_c, fragment_labels, resname='WAT', resid=len(sorted_hbond_ids)+1, outfile='pe.pdb') | |
# now create and dump the QM region | |
c, l, f = build_qm_region(qm_c, qm_l, mm_c[fragments[sorted_hbond_ids]], fragment_labels[fragments[sorted_hbond_ids]]) | |
write_pdb(f, c, l, resname="LIG", outfile='qm.pdb') | |
write_dalton_mol_basis(c, l, f, charge=qm_q, outfile='qm.mol') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment