Created
January 30, 2020 17:30
-
-
Save fabian-paul/abba9172d394dffb93624a710acbab16 to your computer and use it in GitHub Desktop.
Reorder small molecule atoms in PDB file to match the order in template PDB file. Uses RDKit.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import rdkit.Chem | |
def reorder_atoms(mol_pdb_fname, template_pdb_fname, output_pdb_fname): | |
from rdkit.Chem import rdmolfiles | |
mol_to_transform = rdkit.Chem.rdmolfiles.MolFromPDBFile(mol_pdb_fname, removeHs=False) | |
transform_order = list(rdmolfiles.CanonicalRankAtoms(mol_to_transform)) | |
mol_template = rdkit.Chem.rdmolfiles.MolFromPDBFile(template_pdb_fname, removeHs=False) | |
template_order = list(rdmolfiles.CanonicalRankAtoms(mol_template)) | |
if len(template_order) != len(transform_order): | |
raise RuntimeError('Number of atoms differs between template and molecule to transform.') | |
i_transform_order = [int(i) for i in np.argsort(transform_order)] | |
i_template_order = [int(i) for i in np.argsort(template_order)] | |
N_atoms = len(template_order) | |
pos_to_transform = mol_to_transform.GetConformers()[0].GetPositions() | |
lines = [None]*N_atoms | |
for _, (otr, ote) in enumerate(zip(i_transform_order, i_template_order)): | |
#print(mol_to_transform.GetAtoms()[otr].GetPDBResidueInfo().GetName(), | |
# mol_template.GetAtoms()[ote].GetPDBResidueInfo().GetName()) | |
pdb_entry_template = mol_template.GetAtoms()[ote].GetPDBResidueInfo() | |
lines[ote] = '{ATOM:<6}{serial_number:>5} {atom_name:<4}{alt_loc_indicator:<1}{res_name:<3} {chain_id:<1}{res_seq_number:>4}{insert_code:<1} {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{temp_factor:6.2f}'.format( | |
ATOM='HETATM', | |
serial_number=pdb_entry_template.GetSerialNumber(), | |
atom_name=pdb_entry_template.GetName(), | |
alt_loc_indicator=' ', | |
res_name=pdb_entry_template.GetResidueName(), | |
chain_id=pdb_entry_template.GetChainId(), | |
res_seq_number=pdb_entry_template.GetResidueNumber(), | |
insert_code=' ', | |
x= pos_to_transform[otr, 0], | |
y= pos_to_transform[otr, 1], | |
z= pos_to_transform[otr, 2], | |
occupancy=1.0, | |
temp_factor=0.0 | |
) | |
with open(output_pdb_fname, 'w') as f: | |
for line in lines: | |
if line is not None: | |
print(line, file=f) | |
def validate(template_pdb_fname, output_pdb_fname): | |
import rdkit.Chem.rdPartialCharges | |
mol_template = rdkit.Chem.rdmolfiles.MolFromPDBFile(template_pdb_fname, removeHs=False) | |
finished_molecule = rdkit.Chem.rdmolfiles.MolFromPDBFile(output_pdb_fname, removeHs=False) | |
N_atoms = finished_molecule.GetNumAtoms() | |
for i in range(N_atoms): | |
assert finished_molecule.GetAtoms()[i].GetAtomicNum() == mol_template.GetAtoms()[i].GetAtomicNum() | |
assert finished_molecule.GetAtoms()[i].GetDegree() == mol_template.GetAtoms()[i].GetDegree() | |
assert finished_molecule.GetAtoms()[i].GetTotalDegree() == mol_template.GetAtoms()[i].GetTotalDegree(), i | |
assert finished_molecule.GetAtoms()[i].GetHybridization() == mol_template.GetAtoms()[i].GetHybridization(), i | |
assert finished_molecule.GetAtoms()[i].GetFormalCharge() == mol_template.GetAtoms()[i].GetFormalCharge(), i | |
assert finished_molecule.GetAtoms()[i].GetTotalValence() == mol_template.GetAtoms()[i].GetTotalValence(), i | |
rdkit.Chem.rdPartialCharges.ComputeGasteigerCharges(finished_molecule, throwOnParamFailure=True) | |
charges_finished = np.array([float(finished_molecule.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) for i in range(N_atoms)]) | |
rdkit.Chem.rdPartialCharges.ComputeGasteigerCharges(mol_template, throwOnParamFailure=True) | |
charges_template = np.array([float(mol_template.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) for i in range(N_atoms)]) | |
assert np.allclose(charges_finished, charges_template) | |
print('Validation OK') | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(description='reorder ligand atoms according to template', | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('--input', metavar='pdb file', default='in.pdb', | |
help='(in) file name of the molecule to reoreder') | |
parser.add_argument('--template', metavar='pdb file', default='template.pdb', | |
help='(in) file name of the molecule template with the desired order') | |
parser.add_argument('--output', metavar='pdb file', default='out.pdb', | |
help='(out) file name for the reordered molecule') | |
parser.add_argument('--novalidation', action='store_true', | |
help='Skip validation of output.') | |
args = parser.parse_args() | |
reorder_atoms(args.input, args.template, args.output) | |
if not args.novalidation: | |
validate(args.template, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment