Skip to content

Instantly share code, notes, and snippets.

@sshojiro
Created June 9, 2019 18:19
Show Gist options
  • Save sshojiro/6c4bcccdb2b73e12bbaeff4a8cd2e27b to your computer and use it in GitHub Desktop.
Save sshojiro/6c4bcccdb2b73e12bbaeff4a8cd2e27b to your computer and use it in GitHub Desktop.
RDKit - join two substructures.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Structure Evolution\n",
"\n",
"Join two sub-structures."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import rdkit\n",
"from rdkit import Chem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"from rdkit.Chem import Draw \n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2018.03.4'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdkit.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def to_mol(rdmol):\n",
" return Chem.MolFromSmiles(Chem.MolToSmiles(Chem.Mol(rdmol)))\n",
"\n",
"def add_fragment(origin,ix_o,fragment,ix_f):\n",
" editable = Chem.RWMol(origin)\n",
" n_atoms = len(editable.GetAtoms())\n",
" for a in fragment.GetAtoms():\n",
" # import atoms\n",
" editable.AddAtom(Chem.Atom(a.GetAtomicNum()))\n",
" bonds = list(zip(*np.where(Chem.GetAdjacencyMatrix(fragment))))\n",
" for i,j in bonds:\n",
" # import bonds\n",
" i=int(i);j=int(j)\n",
" if i<j:\n",
" bond = fragment.GetBondBetweenAtoms(i,j)\n",
" editable.AddBond(i+n_atoms,j+n_atoms,\n",
" bond.GetBondType())\n",
" editable.AddBond(ix_o,n_atoms+ix_f,Chem.BondType.SINGLE)\n",
" return to_mol(editable)\n",
"\n",
"from rdkit.Chem.Draw.MolDrawing import DrawingOptions\n",
"def molview(mole):\n",
" \"\"\"Function to check atom indices in a molecule\"\"\"\n",
" DrawingOptions.includeAtomNumbers =True\n",
" view = Chem.Draw.MolToImage(mole)\n",
" DrawingOptions.includeAtomNumbers =False\n",
" return view"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAIAAAD2HxkiAAAE2klEQVR4nO3dwXLiRhRAUUjl/39ZWZBM8NSMA5LTt7t9zsreIFFweY0E4n4cxw3o/FHvAHx3IoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKIiRBiIoSYCCEmQoiJEGIihJgIISZCiIkQYiKEmAghJkKI/VnvwNru9/vzv8dxDN7usC3y/xHhVeMzuN/vPzb6/DeLshxdmwI3YBJeVa0MH9sV4QYsZr7MsJXhc36WoxuwHF2S8HYiwkt+OjoKJ3hPOMj1XE2/XXlHcVVyYMZ5wp2IEGLeE0JMhBATIcRECDERQsx5wqHOnS10BHtvTlFAzHL0vDk/szbnXvEJEZ407dcXjuPQ4VpECDERnjHtGHwwDNciQoiJ8G2Tj8EHw3AhItyWDlchwvcsMQZZiwh3ZhguQYRvWHEM6nB+IoSYCF+14hh8MAwnJ0KIifAl647BB8NwZms/t2ADJiHERPjBd1izDb6P93+M3OhaXN7iX9s/UcbfQb9n+gqT8G/f4SlyHMf293FFIoSYCG+37zEGmZYIFUjMgZnb7eMRC00ymAg/XFpXgYwnQn7h4smMHy9kzx+X8+r2O174IebADMRECDERQkyEEBMhxER43rTfuph2x/glEZ435zUjfN5gOSKEmAhf8ruJN9sw/GQMTrWfPBPhS2aL7V3WqDMT4VXz9Km0RYnwVZ/ENkOHny9ExTkzEUJMhG+Ydhgag0sT4c4UuAQRvmfCYai01YnwKyUxWIiuToRvm+FY6H9S4EJECDERnjH5MDQG1yJCiInwpGmHoTG4HA8YxFz896rxF7d1Od3NiPCS8T+C6Wc39+M94SXjG1DdfkzCL/BYHw7Owxjchkn4BR4/Qz3yYKkCdyLC9ShwMyK8ZPypQgXuR4QQ87J61Ytn7a7PzMft/3Q7Hr4NiBBilqMQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECDERQkyEEBMhxEQIMRFCTIQQEyHERAgxEUJMhBATIcRECLG/AP9sMWSGlH7pAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGB size=300x300 at 0x1778E583E80>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toluene = Chem.MolFromSmiles('Cc1ccccc1')\n",
"molview(toluene)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## para"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAEpUlEQVR4nO3da3LiSBCFUejw/rdM/yCiZ3qGl7hSVWbpnAUYkXZ9pDAOX2+32wWAb/2afQEAvckoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAkZ/ZF8Dler2Of9Db7Tb+QQczWMaQ0cmu1+uUgzfrcYcxWIZxUw8QkdGZJm4ut9ttyj3vGAbLSDIKEJHRaaa/ibbq3mSwDCajABEZnePZxnTcFvPwK6+3N01fRe/WGywv+MDTBC8aelwC7ge7QmLGO7po55wqf8jo2a2U12dPZMqzW2mwvOamfrQpq+jdszvNNe5ACzZrjcHylowCRGR0qImr6N2qC2nBVfSu+2D5hIwCRGR0nOmr6N16C2nZVfSu72D5kIwOUvyoA1+T0cmm5HWlhbTF61PHwfI5GR2hyO38vznYgxn4wmSUv/Q67S1WUZYno4cruIredb+1nz7ArboMlq1k9FjtjjqwlYzOUSSvfRfSIgPcqv5g+YKMHqjpUQc2kdEJSuW140JaaoBbVR4s35HRo7Q+6sDnZHS0gnnttZAWHOBWNQfL12T0EGU/5PRMl5KWHeBW1QZLQkb3t8xRBz4ho+MUz2v9hbT4ALeqM1hCMrqzxY468JaMDtIir5UX0hYD3KrCYMnJ6J6WPOrAazI6QqO81lxIGw1wKwvpAmR0Nwsf9Zo61qfjNfOWk7+nhyXtmNcXn3sdfzGXy+XZAHvN9sXVThlso9EV9zP7AtbX8Ye12t8O9MrlVms/uzNwU78n73MN1mjgWrkwGeUjcyvQKJdbyesCZHRnCx/4mloMXCvXJqO8V6ECLXL50OvfLE0fLDkZ3V/fA9+UgTOXjPJGnY2pYy6tomcgo4foeOAfqnbUa/6R1ReqDZaEjB6l3cHuruDAtfIkZJSnalagYC4fcjt/HjJ6oC4HfhkGzhQyymOVN6b6ubSKnoqMHqv+gV+MgTOejPJA/Y2pci6tomcjo4erfOAf6nLU2334qctg2UpGRyh7sNmdVp6QjPKXXhWotpC6nT8nGR3EQjqYgTOMjPKPjhtTnVxaRU9LRsepc+Af6nvUq93a/0ffwfIhGYUdaOWZyehQRfaj/+tegbkLqdv5k5NRgIiMjlZwIV1jY5q1kFpF8X/qJ7gf7Gf/C/7Qxz3ui8NpebWco86eUudKdvHi9emIp2kV5SKjE1U4ZhWuYXcVnlSFa2AY740CRGR0mum/a1p1YzJYBpNRgIiMzjRxb1p7YzJYRpJRgIhXzvmm7E1n+L4bLGPIKEDETT1AREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWI/AZjRK02rekHRAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1778e6bb7b0>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"benzene = Chem.MolFromSmiles('c1ccccc1')\n",
"add_fragment(toluene,4,benzene, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## meta"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAFf0lEQVR4nO3d0XLaSBCGUbO17//K7EVil9e2MNBI03/rnKtU4sQqcH8axERcrtfrGwDP+mf1AQBkk1GAEhkFKJFRgBIZBSiRUYASGQUokVGAEhkFKJFRgBIZBSiRUYASGQUokVGAEhkFKJFRgBIZBSiRUYASGQUokVGAEhkFKJFRgBIZBSiRUYASGQUo+Xf1AbCLy+Xy8evr9brwSGA8GR3ocrl8TufnpH4mr/ASMjrfVi5/zKu2wqMuxmaYL0vRJ/76j7/v5wS2yOgoxYbe/pd//H0/PyCjc+zX0Nvf9OPXfpY4J9dGKflI59ZyFcazb3SIJUvRz67Xq5JyTlajE9xoqBfdsDfXRuPdvw494G2i5YtiOJ7V6InYQAp7sHbI1nCHkwUpZ2M1GmzXYLnYCneS0VSrFn3SCV/Y8MSL2fnE2ViNRnL9cQkXNPiRacwT0dCIg3yI2w+yxWo0zLw8hXpo99iNr2cAGYXf3X/2sjn3hCxtkmQtRbOO9oaGm3NpxWo0xpgqZbE5l1/Z8JQhsaF2Pj3t+m71gXAXGYVNy89eTkURvKgPsHyYn/anAqEH7/aD3ElGu8vNULTbD/uRG0ijT0UnIaOtmZ/+7HBCRtlX4mLqJQf8479gh9NIMtpXXH1maLjDKfFUdCoy2tSksQmqgNsP8gQbnjpKiQ6HsfOpMxmFv5y9eI6MtrM1zNGLkf6Lqf4N7f8YnpaMQkBD6UxG29ladKQvRtKPH7bIKGcXtBR1KupJRjuyID1MUENpS0Y5r8SGNjwVIaNNmRZIIaNh0vPa5/gTl6L0JKPsqEkxv4tuaJ9TEX/IaF/pbzRtpWr58Uc3lIZklF38mqrLu8MOaZLlpyI+c4en1rbujRR0z6Tvvhz5wbfgzH3caEtGeb3bn2L05Y+OvHv8pIZGn0qHkdHu4qblVUf78rvHZz2MBJHRVD3zevuQ6gf80NL1xtfDC8koB2n44Rx7H9Xeep5KT8g79QFSdj49dEn0GNd3x39rzkNGeY3my6KUU9Gj0o9/BhnN0Hxa9r4kCp3JaLbmeX3r1NCpC1KWk9EYbae94SXRU2n7g3EeMhpv7RQJJcgoz4u7JDr1dX368aeT0SRBFWjY0Bm6PdG8yShPC70kGnQq+u7GAxtx/FPJaJgm0xLaUNiDjA5xZF6nhrLJKWpL3JXo85DRPJ2nPWKYOz+AWzS0Mxmd45g6eDl/PA9sczLKA8Y0NPqNpi+yHvmRZDTSjQrs902N6xJezvcno7zApGFutSDV0AgymurgaR/zcv5Dq1z+SENTyCi/m9dQeCEZDXbYempqKDu/0WQpGsRnMcX7GPjj58ow70RDs8hosC/jdPCnY84e5oWfFqehcWQ01fdxOvLDh8cMc9aHawYd6qnI6HwP5dWUflhSWKFMJKORXjJsW5/b/usXDxv1PgtSL+dDyWieXcfpnqWrYd6DhuaS0TCrxumcM9xkodrhGLjBvlFYv1HUf3CIJqNJTNTxDiispzWdjMYwbLtatSB1SXQAGc1gnBZa9ch70lPIaADjNJVLojPIKPzl3oM8R0a7M1FTeVrHkNHWNPRgy3c+vXnSA9l+35dxWsW9B3mIjML/uPcgj/K0NWWiOtvj5lie8VxWox2ZqOaevjkWIxnXdjR0mBPee/BsrEZhX+49OJ5zYC9WJRDHvtFGNBQSyWgXGgqhZLQFDYVcMgpQIqPrWYpCNBldTEMhnYyupKEwgIwClMjoMpaiMIOMrqGhMIaMrqGhMIaMApTIKECJjAKUyChAiYwClMgoQImMApTIKECJjAKUyChAiYwClMgoQImMApTIKECJjAKUyChAiYwClMgoQImMApTIKECJjAKUyChAiYwClMgoQImMApT8By1K8+t5JEsUAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1778e6bbf80>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_fragment(toluene,3,benzene,0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## olto"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAFJElEQVR4nO3d3W4bRwyAUavI+7/y9kKAoVjWRruc1ZCcc+7bWGz5ifoJfNu27QuAs/6b/QMA1CajACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQ8mf2D0DI7XY79w9u2zb2J4FlyWhht9vtdA1P9/dLguFvMrqoSAoj+YZ+vDdalZZBEq5RDtu2rX3EvenB+2S0pPYVmys4Xp/7rUZG68nQ0BUO0tMin/sZaUXeG+0j8jqUb1rGUTJazKslv3T5fw30/SC96E+cZW5DW450BTJayawlt96wQ0Y7kNchMrycbzbSRfiIqYwMrzenV+bzjkZtwREho+XJ6xCvHsXRhxbPbpuRrkNGa5jyydIPjdd74ONqOR/2eW+0gOTx8nbecEZai4wW9vm8tlzv5M9S5Cej2WV4Of9PdfOaaoyP6o50QTKaWsIlt97wg4yWJK9DJBzjo4ojXZOM5pV2yV+tt7VnTTJaT9q81lJijJ6ZSpDRpJJ/slT9IE0yRnqQ0YwsOd+qPDOtTEYrSZXXugdpqjG+I/9IFyej6ZRb8lqMl+FkNJedJU+4/3UP0nKMNDMZrSFhQysyRq4go4lUXHIH6ccYaVoyWkDFvCZkjFxERrNI/kXRHSUO0vxjfEeqkfJNRlPoseSwJhlNrUpekx+kVcb4jiQj5ZGMztdpyRMyXq4mo/O9ui9q7X/yg7QTI81GRvMq1NC0aj0VUZT/ybLosfA73zc4/e+MjKXHVH/V+KGV4xcsM9IV39kanmABYiz/PyXSdb0nPq6638Z9R49H0YD3RrmWVR/CZ0qZyWgi/T6Bnd7QHt8feDXGWo+iMRmFqqY/S3Eno7l0ui+SLHn1gzTJGNkho1CSvOYho+lUuZL2pVryugdpqjHyiowynuUfotZvlFmZjGaU/0oqp+5B+kxDs5FRBrPkQxhjITKaVMUr6Sv38hc6SL2cr0VGoQwNzUlG80p4Je3Lv+QlDtL8Y+QHGU0t1Xozl7ymJaOMUWXJkx+kVcbIIxnNLsl677P8Q/hkqSgZZTnJD9JnGpqcjBaQdr3vKi55wpFWHCN3MkpIs+WXV06Q0RoSrnd1qUba+5edtCejnNdyyT+f15ZjXIqMlpHqeuoh+UjltQq/YLmDoy0YspyNl/ye1888usZjXIf/hMUM2bp4djst/6+P5TMP0BdFe3CNrsh+/tMnD9JnGlqL90aLyfB2XrMlnzXSZmNcmYxyzDrLL6+8SUbryXCQNrPz10Mv+hN9UbQTGeUASz6KMXbiI6aSggepHX429zOlu+k/AOfIaFWRfTudYEsOzzz7cUD7c8mXnDjBe6Mwn4aW5kU9B2R4A/FSkTedG4+FfTIKfzldQx/6LavzZcFFeh+kcJT3RgFCZJTD/DUqeCSjACEyyhkOUvgmowAhMspJDlK4k1GAEBnlPAcpfMkoQJCMEuIgBRkFCJFRohykLE5GGUBJWZmMAoTIKGM4SFmWjAKEyCjDOEhZk4wChMgoIzlIWZCMAoTIKIM5SFmNjAKEyCjjOUhZiowChMgol3CQsg4ZBQiRUa7iIGURMgoQIqNcyEHKCmSUa23bNvtHgGvJKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQMj/yMgxI6ZSQIYAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1778e6bb5d0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_fragment(toluene,2,benzene,0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment