Skip to content

Instantly share code, notes, and snippets.

@richardjgowers
Created May 24, 2022 17:23
Show Gist options
  • Save richardjgowers/c40186e1fe18ebafb22958e18827dd2f to your computer and use it in GitHub Desktop.
Save richardjgowers/c40186e1fe18ebafb22958e18827dd2f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bd66ab90",
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"import json\n",
"\n",
"from openmm import app\n",
"\n",
"from rdkit import Chem"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6ebe7573",
"metadata": {},
"outputs": [],
"source": [
"def openmm_to_rdkit(pdbfile):\n",
" # convert openmm PDBFile to rdkit Molecule\n",
" # all bonds initially SINGLE, all charge initially neutral (to be later rectified)\n",
" rwmol = Chem.RWMol()\n",
" for i, atom in enumerate(pdbfile.topology.atoms()):\n",
" rwmol.AddAtom(Chem.Atom(atom.element.atomic_number))\n",
" rwmol.GetAtomWithIdx(i).SetProp('Name', atom.name)\n",
" # we're fully explicit\n",
" for atom in rwmol.GetAtoms():\n",
" atom.SetNoImplicit(True)\n",
" for bond in pdbfile.topology.bonds():\n",
" rwmol.AddBond(bond[0].index, bond[1].index, Chem.BondType.SINGLE)\n",
"\n",
" #conf = Chem.Conformer()\n",
" #for i, pos in enumerate(pdbfile.getPositions()):\n",
" # conf.SetAtomPosition(i, list(pos.value_in_unit(pos.unit)))\n",
" #rwmol.AddConformer(conf)\n",
" \n",
" return rwmol\n",
"\n",
"\n",
"def fuzzy_query(query):\n",
" \"\"\"return a copy of Query which is less specific:\n",
" - ignore aromaticity and hybridization of atoms (i.e. [#6] not C)\n",
" - ignore bond orders\n",
" - ignore charges\n",
" \"\"\"\n",
" generic = Chem.MolFromSmarts('**')\n",
" generic_bond = generic.GetBondWithIdx(0)\n",
" generic_mol = Chem.MolFromSmarts(''.join('[#{}]'.format(i+1) for i in range(112)))\n",
"\n",
" fuzzy = Chem.Mol(query)\n",
" for a in fuzzy.GetAtoms():\n",
" a.SetFormalCharge(0)\n",
" a.SetQuery(generic_mol.GetAtomWithIdx(a.GetAtomicNum() - 1)) # i.e. H looks up atom 0 in our generic mol\n",
" a.SetNoImplicit(True)\n",
" for b in fuzzy.GetBonds():\n",
" b.SetIsAromatic(False)\n",
" b.SetBondType(Chem.rdchem.BondType.SINGLE)\n",
" b.SetQuery(generic_bond)\n",
"\n",
" return fuzzy\n",
"\n",
"\n",
"def apply_pattern(rwmol, ref, match):\n",
" \"\"\"\n",
" rwmol -> thing to get modified\n",
" ref -> structure to copy from\n",
" match -> the indices mapping ref to match\n",
" \"\"\"\n",
" for atom_i, j in zip(ref.GetAtoms(), match):\n",
" atom_j = rwmol.GetAtomWithIdx(j)\n",
" # copy over chirality\n",
" if atom_i.GetChiralTag():\n",
" rwmol.GetAtomWithIdx(j).SetChiralTag(atom_i.GetChiralTag())\n",
" # copy over bond order & charge, only fancy atoms worth thinking about\n",
" if atom_i.GetAtomicNum() not in (7, 8, 16): # N, O, S\n",
" continue\n",
" atom_j.SetFormalCharge(atom_i.GetFormalCharge())\n",
"\n",
" # map double bonds onto our substructure\n",
" double_bonds = (b for b in ref.GetBonds()\n",
" if b.GetBondType() != Chem.rdchem.BondType.SINGLE)\n",
" for b in double_bonds:\n",
" x, y = match[b.GetBeginAtomIdx()], match[b.GetEndAtomIdx()]\n",
" b2 = rwmol.GetBondBetweenAtoms(x, y)\n",
" b2.SetBondType(b.GetBondType())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb44666d",
"metadata": {},
"outputs": [],
"source": [
"dataloc = '/home/richard/code/openff-toolkit/openff/toolkit/data/proteins/aa_residues_substructures_explicit_bond_orders_with_caps.json'\n",
"\n",
"with open(dataloc, 'r') as f:\n",
" data = json.load(f)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4aabe105",
"metadata": {},
"outputs": [],
"source": [
"pdbfile = app.PDBFile('./T4-protein.pdb')\n",
"rwmol = openmm_to_rdkit(pdbfile)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3f8e5582",
"metadata": {},
"outputs": [],
"source": [
"seen = set()\n",
"\n",
"for atom in rwmol.GetAtoms():\n",
" atom.SetProp('seen', 'False')\n",
" atom.SetProp('resname', 'None')\n",
"\n",
"for resname, patterns in data.items():\n",
" if resname in {'PEPTIDE_BOND', 'DISULFIDE'}:\n",
" continue\n",
" for pattern in patterns:\n",
" #pattern = clean_pattern(pattern)\n",
" q = fuzzy_query(Chem.MolFromSmarts(pattern))\n",
" for match in rwmol.GetSubstructMatches(q):\n",
" seen.update(match)\n",
" for m in match:\n",
" rwmol.GetAtomWithIdx(m).SetProp('seen', 'True')\n",
" rwmol.GetAtomWithIdx(m).SetProp('resname', resname)\n",
" #print(match)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "dd8f22a2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2465 2634\n",
"[1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 2085, 2086, 2087, 2088, 2089, 2090, 2091, 2092, 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, 2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333, 2334, 2335, 2336, 2337, 2338, 2343, 2344, 2578, 2579, 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2598, 2599]\n"
]
}
],
"source": [
"print(len(seen), rwmol.GetNumAtoms())\n",
"print(sorted(set(range(rwmol.GetNumAtoms())) - seen))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment