Skip to content

Instantly share code, notes, and snippets.

@vfscalfani
Created February 8, 2020 21:46
Show Gist options
  • Save vfscalfani/ee95147c843a00304cbb39ca86a91f19 to your computer and use it in GitHub Desktop.
Save vfscalfani/ee95147c843a00304cbb39ca86a91f19 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding PubChem nonstandard dative bonds to a molfile"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# See PUBCHEM_NONSTANDARDBOND\n",
"# https://pubchem.ncbi.nlm.nih.gov/upload/html/tags_substance.html\n",
"\n",
"from rdkit import Chem\n",
"from rdkit.Chem import Draw"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# define function to draw molecule with atom index (see RDKit Cookbook)\n",
"def molblock_with_atom_index(mol):\n",
" for atom in mol.GetAtoms():\n",
" # adding 1 to Idx to align with molfile atom block\n",
" atom.SetAtomMapNum(atom.GetIdx() +1) \n",
" return mol"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAGQCAIAAAAP3aGbAAAO0ElEQVR4nO3d23bixhpGUdgj7//K7AvShNYZHf+vNOdVQmMjsLVcJQrp+Xq9HgAJ/nf1BgAsJVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEOOfqzeAnz2fz89/v16vz42f/x67ZeKene+24yPOfnNYTrDCdKIwVqWxr53+hmPF2fKI098cfiJYSfr7/E8JeN95MFudR5l4CNHhQo5htW+2UB2vL+c8IixkhNWOsUwcNyaafcTX6yVe7Eiw2jF2KP3aR/w+hnX09tA8U0IghmAl6c+wjh62bHxEoyr2ZUoYplOQJcenptcTfH/DwbttecTZbw4/sTQGiGGERZcxEWUZYQExHHSvruZx65pbRfMEq7Syn7+zIpRLCBYQQ7DqKju8ejPI4nyCBcQQrKKKD6/eDLI4mWCxiWZxJsGqKGJ4BecTLLYyyOI0glVO4vBKsziHYAExBKuWxOHVm0EWJxAsIIZgFZI7vHozyOJo2XsIFLHi4tjT5x0b+9f0v2obOYEfbLXi4tjT18Qe/Fej14cpIWy07nLc0/cZO7n+ncdWb4IFl3k+n4OTxO87iNQ3U0I40PQ87jPX+1xkqPO1atUhWHCg1ZfjVqtBpoRwgelyqdUYwYJNzr8c952ZEl5mxcqd2dur/WXe8hwrXx6xs8ErLo49eE3ssQPwBV+BqwjWNVas3Pm+88Ibr7XxOU4E7lqD2zO2EGHilol/nXi+pV6K85kSXmDdyp3vOw/+rpf6Vd74HMuqVs+7EawYBcdQu/ueJT3/qBOIUhtzT6aE5YyFqaVdZfY5FpwSFtmMmxOsclav3AkS9xzVqghTQpijVmUI1gXusHKnkef4fD6ez4dalWFKeI0VK3fiZiVbnuPgMqWzSVU9YfvADZXtVNkN24dalWRKWF3N8w6rFZcQrEIKhulXDTyFx+OhVmUJViFjg6lqg6yfPt4IOxIsIIZg1VJ/kGV4xYUEK0aFZqkS1xKsciqE6VdCxjkEK8m1LTMZ5HJWulf0DpMKHOU7+mMv8uc+33cYvJETCVaYq1rWzvCqsyh0cI3o942f/x68kXOZEhY1Mfu7JBBt1urxeLxej/7rnPWk7kSw4I9+uYykihGsuoq/XZg3vJrVeTpqVY9gwRC1KkmwSis7yGpwePVNraoSrOpqdqHmVs3rH2LXpihN/52EQWPrsL5XMHz7Xs3Q/ypOJFhADFNCIIZgATEEi7sq+fYr0wQLiCFYQAxna8iz5Oqk/YWdW65LOv2InaWtMe87v9dkpWwtj8dDsOJ0SjQRprGv+nWR+uwjdv51+XeGXwlWkulYfN+yfNQzds2L5Y84sXmwL8FqxMJY/BSgXR4RduSgeyPW1WrHR4zs1+DZ+yhMsO4iMijwN8G6BbUaY3yVRbCS9E+PdfS7cgsfURA5h4PuYToFWb5YYcnqrX0fMYIrqmXxo+LuBCuIKSEQQ7CAGIIVo8inXopsxo7KXumDPsECYghWhjoHho1HuJBgQc4pcW5PsIAYghWgznzwzayQqwgWEEOwgBiCVV21+SBcSLBYw2EsLuFsDfCv8y9HxK8ECx6PKy5HxAqmhKVV3gdamhUuvBxR2Z/FfQgWTGkmym0wJYQp06OqloaZEYyw6qo8H3yzuz7+TBWL/6SaIVhADMGqK+KPdsRGzlp9OSIDzJM5hsVNdWbc6y4O9P1VbbS7uOpHSeAI9Y8PMsiUkNs5p1Zmi0cQLO7ltLHVe7YoW/syMOZGLpkJvps18UGfieNlgxs8eNTsJofSHHTnFgarcY7B4kx/bnHC4KcX7/ORRsGqJfGEAZ1Zz+A2XL6FdfbhJZ9bnDB45zrP7miCVUjuCQNmN/vCLSxVq9WWzxYbeLITBKuKhScMeCwb0ZSlVtPGDtKr1ZtgBVjyi3jhMZox/QHj49wtTNyBfx1TT9/YHsEKsOQXsX/w9UyDh94G54mnbeGFO/Chab5zrR6CdaGCY6LVZt/zOv9pXvjCfs/cB1+KLe8S3pxgnW3398uuWltkH5u28BOI0/f86Lxx0f/aX79hKME6z/SQKuUPb0sDwwstXJ3Qn1/ffFlDxV3izpYsgL5wHdb0wuvONgwOCga33C8hCwkWSx034rvJx0rYTrCYd9o46CYHYr7VnPiX5RgWM87co274aV5+IlipTjup01W96Bz/WnK2g8F/HbwDoQxHgx29QPG4b77Rkncham75oKytvZYRVrDj1o5X3oU2nu2AaM44Gu91wMUBQxPw/To8/7hwe9idEdYiW84POXbPznfb4tOs0NDsZfpjjDRAsOZtPD/k9Dfca3e69sPPcA5Twhnbzw95ZkGOmB4m8iK0SrAOMb3DvP5cT2XFgGh2V3y1frGWfpTTn6w/M8uZEm6y8PyQ/a9aPSVccriq+enh2NkOPk/55YLMjRKsTZacH/KgB53tUds76uxJC9p++rdlSpiq+akf9AnWjMpHTN5H9Jdvz/PLxH1+uh3OZEo4b+P5Iae/4faZy8JFWMtPqjX4tRs38iDLF77RBj/Xdkzspct34LHVsDUT0FKwErf5fKaE7Vj361529AR9gnV3P00kqxk7hGdlU6sEa95tf/WL16oxXuolHHRnSuf8B3YqrmWEtV7QyGvd4ozXl4chAAUYYd3F7MdZQpV9B5MjCNaNzH6cZew+E7eXJWRNMiUEYgjWSnF/vYOOuMEYwSKeVVf34RgWzUo8jLXl6gGDH1Bt7LxggjUj7jeeXBuvHtAP3BFXD7iWKSGUsP3qAQtvjCZYt9DGX9cJEx8qPH9jxhxxoK3/PQfnj838ApgSwhk2JmPh1QMGH6Wlc/wL1hpt/Ow5xy7XuF1ytsU7/FqaEtKImvvqOyInbNv0+4bNECw4yuyQp7OIoezVA+owJYT9rTjL/mPz1QP6X9ve9Rnbn/Ru1MZxgTaeRYolr7afyDqmhDP8VvETtTqUYMGe1OpQjmG1zx5SxC7rG25OsGjKwoPW50fcn41dCBbtWPjh4fOXC6jVXhzDohELPzxsbBVNsGjfhSsw1WpfgtU+O8xV54RSq905hjVqy7kf+7d3/sj7PT7f+fnwU96dYA3beO7H/o0T8eI0LmSdzpRwwPZzP5Z6Q/0mZj887ELWDRCs3Rg3na+fpOeXwU8Fj30fP74IpoQ/W3jux7Gv9Yd9LxNn15y9cXAE3diJDZokWD9bcu7HM90wggd9xqU/IrvbC1ufYAW752fTTgi0cpUlWAP6F+DcZSfZcU9bnqrGFmecPJzslGticP3razuRwhsOmZcTrGEbz/14tIUPtPvijOUPfYQL9+TZrGy/6Onnf7dtaeMEa9Svh29nV0Jc/hG2XxdnPCrtP6XGHUdc9PTzbeu85gVZ1sBfFq4AOHOn6qxRyNV/0dp4XmcywmrK8h1g3Yxv9+N6sxJ36YWv7RHHSZsnWDuo8Kv26zuG1RZnDKrwwq6w5LVVq3UEawef4w6X/M6d+dBn7lcN78PT7xuO3YGHY1h7eX9C7eRxyufgztj7A9OfrSuu8h67+2vrc44LGWHt6eSh1uyj7L4449XchTlX2/ja/vq1vBl5HuKea9DhaKaEh7hkhgjNE6wDfc52cvWGQCME61iGWrAjwTqDg1mwC+8Sco0VpzqYfoPS25d3IFjX2PesL4+03XXFOu+JMxzM/ivNEKwL7H7Wl6zddd2pDpZ/yJGGOYZ1tu1nJrn5rju2/PLR0HkdGGOElWHhpOmRH6zpd1Q7r8Ng+jWrYYJVy+rzfH720vTddeJUB+lPje0Eq5bVZ31pfk+erpWW3YRjWJwt/TQSXMgI62yvYy7Jk2XFqQ4eQ2tBPi+d00jcxO12lSJ+XYc12zi7K3cgWEAMx7CAGILFlRxu5yeCBcQQLCCGYHElZzfkJ4IFxBAsIIZgATEEi4s5jMVyggXEECyu59NhLCRYQAzBooDX6+EwFgsIFhBDsIAYggXEECxqcBiLBQQLiCFYQAzBogyXF2COYAExBAuIIVhADMECYggWEEOwgBj/XL0B8Lfv9e5jCx0+97ES4mYEi0qez78a1Pnf/o2Dd6BdpoSU0a/P4AcMFerGBIsQ/XIZXt2PYBGi0ya1uiXBIpBa3ZVgkUatbkywKKN/iF2b+JtlDVyqk6ROs8aWOCxZq0WLni8/b65iAMWPjLC4wnuIpFb8SLA4nYEVaznozrnUig0EixOpFdsIFid5qhWbCRZneD69H80OBIvDqRV7ESyOpVbsSLA4kFqxL8HiQGrFvgQLiCFYQAzBAmIIFhBDsIAYggXEECz+9exdR+v7lucf/fuM/dPEd4Z1nA+Led/rP8f+u/+/D6lib0ZYzBtc/9nPU/9ur9fL2lF2JFgs9Z73LQmQgRUHMSXkP9OheadqSbOMqjiIYPEfR6AozpSQecpFEYLFSq/XqxMyXeNopoTM+27T97Sx06yx5Q6wF79YQAxTQiCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcQQLCCGYAExBAuIIVhADMECYggWEEOwgBiCBcT4P9AFEN956BohAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGB size=400x400 at 0x7F03C06F4E80>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create the molecule from a molfile\n",
"# example from: Cassidy, S.J. Electron-deficient heterofluorene conjugated polymers and small molecules. \n",
"# Ph.D. Thesis, The University of Alabama, 2018.\n",
"\n",
"mol = Chem.MolFromMolBlock(\"\"\"\n",
" RDKit 2D\n",
"\n",
" 24 27 0 0 0 0 0 0 0 0999 V2000\n",
" 1.0344 -8.1310 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.1491 -7.1273 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.8373 -5.6601 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.9520 -4.6564 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.6401 -3.1891 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.2135 -2.7256 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.2135 -2.7256 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.0000 -3.6073 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.1568 -5.0991 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.5271 -5.7092 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -2.7406 -4.8275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -2.5838 -3.3357 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 3.7548 -2.1855 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 3.4430 -0.7182 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 4.5577 0.2855 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 5.1814 -2.6490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 5.4933 -4.1162 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 4.3786 -5.1199 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1 2 1 0\n",
" 2 3 1 0\n",
" 3 4 1 0\n",
" 4 5 2 0\n",
" 5 6 1 0\n",
" 6 7 1 0\n",
" 7 8 2 0\n",
" 8 9 1 0\n",
" 9 10 2 0\n",
" 10 11 1 0\n",
" 11 12 2 0\n",
" 12 13 1 0\n",
" 13 14 2 0\n",
" 14 15 1 0\n",
" 15 16 2 0\n",
" 16 17 1 0\n",
" 17 18 2 0\n",
" 5 19 1 0\n",
" 19 20 1 0\n",
" 20 21 1 0\n",
" 19 22 2 0\n",
" 22 23 1 0\n",
" 23 24 2 0\n",
" 24 4 1 0\n",
" 14 6 1 0\n",
" 12 7 1 0\n",
" 18 13 1 0\n",
"M END\"\"\")\n",
"molblock_with_atom_index(mol)\n",
"Chem.Draw.MolToImage(mol, size=(400, 400))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We want a dative bond between Oxygen (index #2) and Boron (index #6).\n",
"# These index numbers correspond to the atom block lines (e.g., Boron is on line 6 of atom block).\n",
"\n",
"# now format the annotation: \n",
"\"\"\"\n",
"> <PUBCHEM_NONSTANDARDBOND>\n",
"2 6 5\n",
"\n",
"\"\"\"\n",
"\n",
"# This says: make a dative bond between atoms 2 and 6, with PubChem nonstandard bond 5 (which is dative)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# final edited molfile:\n",
"\n",
"\"\"\"\n",
" RDKit 2D\n",
"\n",
" 24 27 0 0 0 0 0 0 0 0999 V2000\n",
" 1.0344 -8.1310 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.1491 -7.1273 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.8373 -5.6601 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.9520 -4.6564 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 2.6401 -3.1891 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.2135 -2.7256 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.2135 -2.7256 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.0000 -3.6073 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -0.1568 -5.0991 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -1.5271 -5.7092 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -2.7406 -4.8275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" -2.5838 -3.3357 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 3.7548 -2.1855 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 3.4430 -0.7182 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 4.5577 0.2855 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 5.1814 -2.6490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 5.4933 -4.1162 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 4.3786 -5.1199 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 1 2 1 0\n",
" 2 3 1 0\n",
" 3 4 1 0\n",
" 4 5 2 0\n",
" 5 6 1 0\n",
" 6 7 1 0\n",
" 7 8 2 0\n",
" 8 9 1 0\n",
" 9 10 2 0\n",
" 10 11 1 0\n",
" 11 12 2 0\n",
" 12 13 1 0\n",
" 13 14 2 0\n",
" 14 15 1 0\n",
" 15 16 2 0\n",
" 16 17 1 0\n",
" 17 18 2 0\n",
" 5 19 1 0\n",
" 19 20 1 0\n",
" 20 21 1 0\n",
" 19 22 2 0\n",
" 22 23 1 0\n",
" 23 24 2 0\n",
" 24 4 1 0\n",
" 14 6 1 0\n",
" 12 7 1 0\n",
" 18 13 1 0\n",
"M END\n",
"\n",
"> <PUBCHEM_NONSTANDARDBOND>\n",
"2 6 5\n",
"\n",
"$$$$\n",
"\"\"\"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:my-rdkit_2019_09_2-env] *",
"language": "python",
"name": "conda-env-my-rdkit_2019_09_2-env-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment