Skip to content

Instantly share code, notes, and snippets.

@ptosco
Last active December 16, 2022 06:45
Show Gist options
  • Save ptosco/4844d3635cf14d11e5e14381993915c1 to your computer and use it in GitHub Desktop.
Save ptosco/4844d3635cf14d11e5e14381993915c1 to your computer and use it in GitHub Desktop.
XYZSmilesToMol
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem import AllChem\n",
"from io import StringIO\n",
"from rdkit.Chem.Draw import IPythonConsole"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"bilastine_xyz = \"\"\"\\\n",
"71\n",
"Untitled\n",
"O 9.63690 -1.58160 -0.45380\n",
"C 9.23030 -0.38070 -0.26330\n",
"O 9.53320 0.63440 -0.98540\n",
"C 8.29670 -0.14110 0.90970\n",
"C 6.86630 -0.21600 0.37040\n",
"C 5.99890 0.85000 0.48030\n",
"C 4.72010 0.77250 -0.01980\n",
"C 4.28340 -0.37540 -0.64060\n",
"C 5.14050 -1.44740 -0.75190\n",
"C 6.42000 -1.36480 -0.25100\n",
"C 2.86960 -0.42590 -1.19810\n",
"C 1.82570 -1.12160 -0.29110\n",
"N 0.45790 -0.49150 -0.40460\n",
"C 0.30510 0.71020 0.49470\n",
"C -1.05500 1.39670 0.26480\n",
"C -2.21490 0.40470 0.49380\n",
"C -3.55160 1.11540 0.33980\n",
"N -3.65970 2.33840 -0.15600\n",
"C -4.98220 2.70660 -0.12220\n",
"C -5.65890 3.86780 -0.49940\n",
"C -7.01040 3.88820 -0.30660\n",
"C -7.68670 2.79020 0.24420\n",
"C -7.02040 1.65330 0.61150\n",
"C -5.64490 1.63960 0.41220\n",
"N -4.77720 0.62490 0.70390\n",
"C -5.09690 -0.69360 1.28050\n",
"C -5.75440 -1.65060 0.26870\n",
"O -7.15700 -1.35080 0.14090\n",
"C -7.81570 -2.13580 -0.87190\n",
"C -8.06640 -3.58880 -0.43050\n",
"C -2.03470 -0.80200 -0.45270\n",
"C -0.66740 -1.47780 -0.21060\n",
"C 8.66740 1.21300 1.55470\n",
"C 8.53470 -1.19680 2.00900\n",
"H 6.30500 1.69120 0.92770\n",
"H 4.10530 1.55610 0.06540\n",
"H 4.83380 -2.28740 -1.19970\n",
"H 7.03590 -2.14790 -0.33940\n",
"H 2.89660 -0.97930 -2.26320\n",
"H 2.52610 0.70490 -1.40590\n",
"H 2.17690 -1.05920 0.85540\n",
"H 1.78600 -2.28270 -0.59480\n",
"H 1.17950 1.50720 0.29720\n",
"H 0.36390 0.34570 1.63660\n",
"H -1.09870 1.80510 -0.86260\n",
"H -1.16970 2.31860 1.02460\n",
"H -2.11820 0.02170 1.62630\n",
"H -5.17550 4.65060 -0.89160\n",
"H -7.52890 4.70380 -0.56290\n",
"H -8.67740 2.83950 0.37110\n",
"H -7.50010 0.86900 1.00460\n",
"H -5.85610 -0.54200 2.19760\n",
"H -4.09990 -1.20120 1.71070\n",
"H -5.62210 -2.77180 0.67200\n",
"H -5.20930 -1.53560 -0.79380\n",
"H -7.20750 -2.11670 -1.90630\n",
"H -8.89010 -1.63610 -1.06100\n",
"H -8.83490 -4.10350 -1.19490\n",
"H -7.04250 -4.21360 -0.43540\n",
"H -8.54010 -3.59310 0.67170\n",
"H -2.89380 -1.61770 -0.27120\n",
"H -2.08970 -0.41960 -1.58900\n",
"H -0.63170 -1.89100 0.91540\n",
"H -0.56400 -2.40180 -0.97070\n",
"H 9.81390 1.19990 1.90810\n",
"H 8.51020 2.09510 0.75650\n",
"H 7.96030 1.41240 2.50380\n",
"H 9.68450 -1.17670 2.35180\n",
"H 8.26720 -2.28540 1.58040\n",
"H 7.83300 -0.96160 2.95390\n",
"H 0.37450 -0.19410 -1.26220\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"bilastine_smi = 'CCOCCn1c(C2CC[NH+](CCc3ccc(C(C)(C)C(=O)[O-])cc3)CC2)nc2ccccc21'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert the XYZ to a PDB block so we can use the proximity bond generator of the PDB parser"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def xyz_to_pdb_block(xyz_block):\n",
" pdb_block = ''\n",
" n = 0\n",
" n_atoms = 0\n",
" with StringIO(xyz_block) as hnd:\n",
" while True:\n",
" line = hnd.readline()\n",
" if (not line):\n",
" raise RuntimeError('XYZ block ended prematurely')\n",
" n += 1\n",
" if (n == 1):\n",
" try:\n",
" n_atoms = int(line.strip())\n",
" except Exception as e:\n",
" raise type(e)('Could not parse number of atoms on line {0:d}'.format(n)) from e\n",
" elif (n > 2):\n",
" try:\n",
" elem, x, y, z = line.strip().split()\n",
" except Exception as e:\n",
" raise type(e)('Could not parse coordinate line {0:d}'.format(n)) from e\n",
" pdb_block += 'ATOM {0:5d} {1:>2s} UNL 1 {2:8.3f}{3:8.3f}{4:8.3f} 1.00 0.00\\n'.format(\n",
" n - 2, elem, float(x), float(y), float(z))\n",
" if (n == n_atoms + 2):\n",
" break\n",
" return pdb_block "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get a molecule connected by single bonds"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"xyz_mol = Chem.MolFromPDBBlock(xyz_to_pdb_block(bilastine_xyz), removeHs=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reorder atoms to make sure that hydrogens are placed after heavy atoms"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"order_by_atomic_num = tuple(zip(*sorted(\n",
" [(a.GetIdx() if a.GetAtomicNum() > 1 else a.GetIdx() + xyz_mol.GetNumAtoms(), i\n",
" ) for i, a in enumerate(xyz_mol.GetAtoms())])))[1]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"xyz_mol = AllChem.RenumberAtoms(xyz_mol, order_by_atomic_num)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f88a22a6710>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xyz_mol"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"smiles_mol = Chem.MolFromSmiles(bilastine_smi)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f88a0a2a8a0>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smiles_mol"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get a molecule with bond orders taken from SMILES and no hydrogens"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"xyz_mol_no_h = AllChem.AssignBondOrdersFromTemplate(smiles_mol, Chem.RemoveHs(xyz_mol))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f88a0a2c960>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xyz_mol_no_h"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"xyz_mol_bo = Chem.RWMol(xyz_mol_no_h)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Put back hydrogens in place and connect them to their parent heavy atom with a single bond"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"for a in xyz_mol.GetAtoms():\n",
" if (a.GetAtomicNum() == 1):\n",
" xyz_mol_bo.AddAtom(a)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"xyz_mol_conf = xyz_mol.GetConformer()\n",
"xyz_mol_bo_conf = xyz_mol_bo.GetConformer()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"for b in xyz_mol.GetBonds():\n",
" if (b.GetBeginAtom().GetAtomicNum() == 1):\n",
" hydro = b.GetBeginAtom()\n",
" elif (b.GetEndAtom().GetAtomicNum() == 1):\n",
" hydro = b.GetEndAtom()\n",
" else:\n",
" continue\n",
" heavy_idx = b.GetOtherAtom(hydro).GetIdx()\n",
" hydro_idx = hydro.GetIdx()\n",
" heavy = xyz_mol_bo.GetAtomWithIdx(heavy_idx)\n",
" heavy.SetNoImplicit(True)\n",
" heavy.SetNumExplicitHs(0)\n",
" xyz_mol_bo.AddBond(heavy_idx, hydro_idx, Chem.BondType.SINGLE)\n",
" xyz_mol_bo_conf.SetAtomPosition(hydro_idx, xyz_mol_conf.GetAtomPosition(hydro_idx))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is the molecule with hydrogens and bond orders"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.RWMol at 0x7f88a0a2c998>"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xyz_mol_bo"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment