Skip to content

Instantly share code, notes, and snippets.

@jose-manuel
Last active March 13, 2020 08:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jose-manuel/a11ecda5b6989c2a5416d252bbf4c3db to your computer and use it in GitHub Desktop.
Save jose-manuel/a11ecda5b6989c2a5416d252bbf4c3db to your computer and use it in GitHub Desktop.
Potential roblem with a canonical tautomer
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Problem with the Canonical Tautomer"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-03-13 09:47:19,492 | INFO : test\n"
]
}
],
"source": [
"import logging\n",
"import sys\n",
"# activate logging messages\n",
"logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s', level=logging.INFO, stream=sys.stdout)\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"logger.info('test')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"RDKit WARNING: [09:47:19] Enabling RDKit 2019.09.3 jupyter extensions\n"
]
}
],
"source": [
"from itertools import tee\n",
"import six\n",
"from rdkit import Chem\n",
"from rdkit.Chem import BondDir\n",
"from rdkit.Chem import BondStereo\n",
"from rdkit.Chem import BondType\n",
"from rdkit.Chem import Draw\n",
"from rdkit.Chem import rdChemReactions\n",
"from rdkit.Chem.MolStandardize.tautomer import TautomerEnumerator\n",
"from rdkit.Chem.MolStandardize.tautomer import TautomerCanonicalizer\n",
"from rdkit.Chem.MolStandardize.tautomer import TautomerScore\n",
"from IPython.display import Image\n",
"from IPython.display import HTML\n",
"from IPython.display import SVG\n",
"from IPython.display import Image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Problem"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f130446b530>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mol = Chem.MolFromSmiles('O=C1C=NC=C[N+]1=O')\n",
"mol"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-03-13 09:47:19,744 | DEBUG : Applied rule: oxim/nitroso r to O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,746 | DEBUG : New tautomer produced: O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,752 | DEBUG : Applied rule: oxim/nitroso f to O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,754 | DEBUG : Previous tautomer produced again: O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,760 | DEBUG : Tautomer: O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,767 | DEBUG : Score +2 (C=O)\n",
"2020-03-13 09:47:19,769 | DEBUG : Score +2 (N=O)\n",
"2020-03-13 09:47:19,771 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,774 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,777 | DEBUG : New highest tautomer: O=C1C=NC=C[N+]1=O (6)\n",
"2020-03-13 09:47:19,778 | DEBUG : Tautomer: O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,780 | DEBUG : Score +4 (oxim)\n",
"2020-03-13 09:47:19,782 | DEBUG : Score +2 (C=O)\n",
"2020-03-13 09:47:19,783 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,784 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,784 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,786 | DEBUG : New highest tautomer: O=C1C=NC=C=[N+]1O (9)\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f130447b9e0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logger.setLevel(logging.DEBUG)\n",
"tc = TautomerCanonicalizer()\n",
"canon = tc.canonicalize(mol)\n",
"canon"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This means I have the transformation below:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-03-13 09:47:19,825 | DEBUG : STREAM b'IHDR' 16 13\n",
"2020-03-13 09:47:19,826 | DEBUG : STREAM b'bKGD' 41 6\n",
"2020-03-13 09:47:19,826 | DEBUG : b'bKGD' 41 6 (unknown)\n",
"2020-03-13 09:47:19,827 | DEBUG : STREAM b'IDAT' 59 8192\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4QAAAEsCAIAAAAU/OrGAAA14ElEQVR4nO3deViW153/8S8IKooL7nFX3MENQdz31A2bX8aYX66r8ySZtMWrTQe80kkxk/6CnWk6ZCbpQJpOB7NMSZtJLzPJ1XlcU3CNSVVEk7iAuEVcEjfUKCaC8P39cT95QDTKct/PeZb365o/pvBwn6Px5nzu+5zzPWGqKgAAAIAJ4aY7AAAAgNBFGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUQAAABhDGAUAAIAxhFEAAAAYQxgFAACAMYRRAAAAGEMYBQAAgDGEUYSSc+fk3//9Dl9Xlddek6QkadtWOnSQ2bPl/fd93jkAABrvnkPY8uUSHV3/px56SMaM8VUX74EwitBQVSU5OTJkiDz1lKxeXf+7TzwhqakycqTk5cl//IdERsq8efLKKyY6CgBAYwT+EBZhugOA89aulaeektJSEZEFC2To0Fu++6c/ye9/L9nZkp7u+cr3viePPipPPSWzZsmIEb7uLQAADRQUQxhvRhHUSkslJUVSUqS0VIYMkdWrZe1aGTLkls/k5Ei/fvLkk7d88fnnRUR++1vfdRUAgMYKiiGMN6OOqKmp+f3vf3/mzJmSkpJly5YlJiaa7lHouXxZsrLk3/9dKiulY0dZvlyWLZNWrep/7KuvpLBQli6ViFvvhT59ZNw42bbNZ/0FAL+1efPmt956q1OnTsnJyYsXLzbdHXyjUUPY5cu3/M+qKqd713CEUft9+OGH6enpRUVFLVq0qK6ufvvttx9//PHnn3++R48eprsWGmpq5I9/lKeflnPnJDxcXC558UXp1u3OHz57VqqrpW/fO3yrXz+2MQEIccePH3/66affffddEQkLC1PVGTNmZGdnjx492nTX0JghrKJCYmLqf8Zv/iMyTW+n06dPP/roo1OnTi0qKurVq9fvfve75557rmXLlm+88cagQYNWrFjx9ddfm+5jsNuyRRIS5LHH5Nw5mTFD9uyRN9/81iTqFRbWuK8DQLC7fv36ihUrRowY8e6777Zp0+a555579dVXu3XrtmXLloSEhEcfffTcuXOm+wgRadgQ1rq1bN58y/9Nm+ab3jWIwg7Xr1/PysqKjo4WkaioqIyMjC+//NL61uHDh5csWWL9bQ8aNGjVqlVmuxq0ysrU5VIRFdE+fTQvT2tq7v1T169rixb64x/f4VsTJmh8vO3dBAA/V1NTs2rVqr59+4pIWFjYkiVLTpw4YX3r0qVLGRkZLVu2FJGOHTtmZWV9/fXXZnsb0ho4hGVkaNu29T+weLGOHu1o7xqOMGoDt9vdv39/K26mpKQcP3789s9s3Lhx5MiR1mdmzZr1ySef+LybwauiQjMztXVrFdE2bTQzU7/6qhE/PmGC9uunVVW3fLGsTCMj73yHA0DwKiwsnDRpkjVaJSYmbt++/fbPHDp0KCUlxfrMkCFDVq9e7ft+wqMhQxhhNLgVFRVNnTrVuiHHjh27devWu3y4qqoqNze3a9euIhIREZGamnru3DmfdTU41dToqlXat6+KaFiYLlmi3zy+N8Lbb6uIZmff8kWXSyMj9cABu3oKAH7u9OnTqamp4eHhItKzZ8/c3Nzq6uq7fD4/Pz8uLs4aAefMmbN//36fdRW1GjKEEUaD1YULF9LS0lq0aCEinTt3zs7OvnnzZkN+sLy8PC0tLSIiQkRiYmKys7Or6j3QoIEKC3XyZM+8fGKi3unxvaEef1zDwvSxx/RPf9K8PL3/fhXR3/zGvr4CgP+6ceNGdnZ2u3btRKRly5ZpaWlXrlxpyA9WVlZmZ2d36NBBRCIjI9PS0i5fvux0b+Hh/au+5xBGGA0+ttx7xcXF8+fPtx4ohw4dunbtWie6GrROn9bUVA0PVxG97z7NzdW7Pr7fW02NvvqqjhunUVHavr3OmqUbNtjUVwDwa263e+DAgd6VZkePHm3sFZr8dgZNdP68pqVpjx6ePHrPIYwwGmTy8/NHfHOewZw5cw40bxrX7XbHxsZ6r3bw4EG7+hm0btzQ7Gxt315FtGVLTUvThj2+AwDqOXjw4Ny5c60xaPjw4evXr2/O1fbs2TPtmw3aY8aMufu6NTTR119rVpa2a6ci2qqVrlljukP2IIw2VElJyYIFC7zrtdfY9C/Aes/avn1773vWBk6OhCK3WwcO9MzLp6Ro4x/fAQCqevHiRe+7zE6dOtn4LtPtdg8YMMD7nvXYsWO2XBaqqm63xsZ6BsE5czSIXmARRu+tvLzcW8kiJiYmKyvrxo0b9jZx5syZ1NRU6/dCly5dmOOo7+BBnTvXcwcOH67Ne3y/s5s3mzvXDwB+r7KyMjc3t0uXLt6ttOfPn7e3CavWobUCtV6tQzRRSYnOn+8ZBIcO1aBb2kcYvZvq6uq8vLxu3bqJSHh4uMvlOnv2rHPNFRUVTZkyxXqgTEhI+OCDD5xrK2BcvKhpadqihYpop06ana1OxPTNm3X0aH3tNfuvDAB+Iz8/Pz4+3hplZs+evW/fPufaOn36tMvlCgsLs/bm5+Xl1TSk9jPqKS/XtDSNiFARjYnRrCy1+3WYPyCMfqtNmzaNGjXKumlnzpzps8qgbre7X79+3jmOzz77zDft+pvKSv3s1b9ox44qopGRmp6u5eX2N3P8uD70kOdxMznZ/usDgB8oLS31Hr8yePBgnx2/smvXrokTJ1rtJiUlffTRR75pNxhUVWlurnbtqiIaEaGpqRq85SAJo3dQVlbmcrmsm6dPnz55eXk+7kBFRYX3PKc2bdpkZGRcvXrVx30wKz9f4+M1vusXNe076OzZ6sTj++2l8q9ft78VADDq6tWrmZmZrVq1EpHo6OjMzEwfn5lUU1OTl5fXo0cP73lOZWVlvuxAQNq4UUeO9LwomTVLg/2gHMLoLa5du5aZmdm6dWsRadu2bWZm5leNOsvHVidPnvTOcfTu3TtE5jhKSnThQs8NOGSIHnr/uP1tWKXy+/WrLZUfqq+fAQQxa6VZ9+7dvSvNvvjiC1Od8avh1a8dPqxLlnhGwUGDNDSOECeMetzlKF6zduzYkZycbL2mTU5O3rFjh+keOeXqVc3M1FatVESjozUzUx15et+9u7ZU/rhxzSqVDwD+yj/HjiNHjnhXCxiZePRr167Vzte1bdvoo60DGWFUVXXXrl11j+L98MMPTffoFn71dOuE6mrNy9Pu3VVEw8PV5VJH/nxnzthcKh8A/I//z6pt3LjRyJYM/1VTo3l52qOHZ77O5dLPPzfdJ58K9TDa2KN4DTK+7schO3ZocnLtDiJHnt4plQ8gBFRUVGRmZkZFRfn/fgPrJUvXrl29L1nOBe/unHvYtUsnTvSMgklJGpJ7vEI3jN5+FG9AFEKrW3v/gQeOBfThCydPqsulYWEqor17a16eOvL0Tql8AMHOWmkWcJVYLl68+JOf/CQiIkJE5s3739/8RquqTPfJl06dqh0Fe/VybBQMACEaRgP9iIh169Z997svWflq3jwtLjbdoUayNrJHRXk2smdkqCNP78XFOm+eJ4YOG+ZIqXwAMG337t0BXaN6//79Dz74g3btakQ0Lk7/8hfTHfKB69c1K0ujo1VEo6I0I0MD4XWYc0IujO7du3f69Oneo3g3bNhgukdNVFmpubnapUttATK7D9FwRN2N7NabSkee3q1S+VaVYKtUfmg9bgMICXVP77vvvvtyc3OdPr3v+9///n//9387sQg1P1+HD6896vLAAdtb8Btut/bvXzsKBtrrMCeEUBh17ijeujZv3pyUlHTo0CHbr3xHvjmfyC67d+uUKZ4bMCFBHXl6t6oEB1xIB4DGqKyszM7Obt++vYhERkampaVdcWAp/OHDh5csWfL2229b/3Pjxo3Wq5zJkyfv3r3b9uYqK2uX90dGalqaXr5seyMmFRXplYee8IyCY8fqtm2me+QvQiKMWjdtx44drZvWiaN4VfXYsWOLFy+2btQf/OAHtl//Lnxwcnsz3b6R3ZHQXFCg8fGev4jZs/XTTx1oAwAMc7vdsbGx3pVmR44csb2JK1euPP300y1bthSRuLg461VovfL1Lpfrcwc2fZ8/X/uSpXNnf3/J0kAXLnj+UD9LyA+eP5V9gj+M5ufnx8XFWTftnDlz9u/fb3sT1gZGq5ZvmzZtTNXy9c+NOrc/6Tqykb20NASrBAMINcXFxfPnz7dGtGHDhq1bt872Ju6ZOH1Tvr6oSKdOrZ1JC9x3iDdu6L/9m3bo4Knm8tOfatXla6Y75XeCOYweOnQoJSXFexTv6tWrbW/i9lL5ZjcwWiWM2rXzlxJGbrfGxtbmYwee3r+pEmzVyreqBAdFxSsAqKu8vDwtLc3aeB4TE5OdnV3lwFL4nTt3TpgwwRo3x48f/9e//vXbPmnN4FufHDRokEOH3ddbXXn8uBONOCg/X0eMqF0I68DbsCARnGH00qVLGRkZVknOjh07ZmVlOVGSs7CwcPLkydatOG7cuO1+c5bP6dO1c+I9e5op7l5crPPn125kd+Dp/U618kOsSjCAUFBVVZWbm2uV5IyIiEhNTXWiJOepU6e8pfJ79erVwFL5GzduHDlypDUOzpo1y4ny9RUV9fed+2vt1FscOnTL0dYBXYfRB4ItjFp1dLt16+boYUXWBkarVL61gdEPS+UXFuqkSZ47ITHRd8de1t3IHhPj2Eb2nTt1wgTPH2/8eP32x3cACFz10t6nDiyFv379elZWVnR0tIhERUU1tlS+r7JywFTkvHRJMzK0ZUsV0Y4dNStLb9ww3Se/F1RhdMuWLaNHj7Zu2unTp+/du9f2JqxS+dYGRqtUvhMbGO1i1VHq29dzwNiSJXrihIPNWRvZu3at3cjuxIEaJ07o97/3VXX3+1RE+/bVP/3Jf38nAUBT+Woe3N2/f3/vXqjjTZ0I99UqAr9+C2FN13XrVjtdd/as6T4FiCAJo745itftdg8cONB70x71ky1C92JVmG/d2lNhPjNTndhetWlT7cqY++93pERcRYU+95ynVP5rM/+oK1ZoRYX9zQCAUdYOIWulmZM7hIqmTp3qLZW/zY4tQnX3Vw0dOnTt2rXNv2Y9VuDzw1PcN23SUaM8g+CMGfrxx6Y7FFACPozevpP9+vXrtrdy8ODBefPmeTcwrvfD4kn3UlamLpfnPunTR/PybL7+G284u5E90JexA8A9+aZ20oULF7xVtzt37mx71e26lafmzJlz8OBBGy9u8audq04Pr6EggMNo3aN4ndvJfnupfCemHnxm82YdPdqRR7fqan39dUdWxgRNgQ8AuIt6O9k/+ugj25uwqm536NDBWyr/sjNl5X1Vk99wTT/fTDyGgkANo7t373Z6J7u1KLtLly7eRdlOlMr3vQBa1BKUpY8BoJ6m7WRvrPz8/BEjRnhfWB5w/sDNuqeVdunSxaGDDwsKdORITySdNctHp534eEtG0Au8MOqbnewFBQXx8fHWTTt79ux9+/bZ3oRZt2/386vqnEF/KBwAaLN3sjdQSUnJggULHF3KeRe7du2aNGmS1XpiYuKOHadtb+L2c6Cd2D7rVa9YzYcfOthWiAikMOqbneylpaXeDYyDBw92aAOjn6hXCM2BYwGaIj9fhw+vrRLs/NM7ABhg1072uygvL8/IyLBO9YyJicnKyrphqM6Q2+3u169f374zW7Vyat2/DwoL+kMZ76AUMGHUBzvZr1696t3AGB0dnZmZ6USpfD+Un69xcX5xRERJiS5Y4OnJ0KHq26d3APCRujvZx44da8tO9nqsqttW+U+r6vZZ00uyrl279m//dsqqiNKmjf7TP6kD+421uFjnzbP/yBV/O+AwyARAGC0uLnZ6J7t103bv3t3RUvn+zJoWtw7PNTItXl5eu2wgJoYqwQCCk9M72S2bNm0aNWqUNW7OnDnTiYORmuzkydry9b17O1W+3t7DqN1uHTiw9moBUtcxkPh1GLV2sltFdJ3byb5jx47k5GTrpk1OTr7LUbxB7+xZ/cEPPBMQv/qVjxqtWyrf2lDl6FofADDCNzvZT5w44XK5rBGtT58+ef5aZ2jHDk1O9sS75GRHytd/9ZX+6leec0QXLmz6dcrKPDWkRo3STZvs6x/q8NMw6pud7L4plR9w9uzR//t/fVROfuPG2irBM2eqPz29A4BtfLCT3SqVb1Xddq5Uvo2s0i7du9e+iXBiSvL0aX388eYuP/uXf9Hf/Y5yLg7yxzBabye7E0fxVlRUeDcwtmnTxqENjKHj6ae1sTVe69aHi401UB8OAHyg3k72NWvW2N5EvVL5S5YsKSsrs70Vh1y9Wlu+PjracPl6SxNGNDSTf4XRujvZHT2K1yqVb+2FcqJUfqiZPl0bXvzKOjnDqhJsnZzh30/vANAUvtnJvmvXrokTJ1ojWlJSkhOl8n2gtLT29cTgwYZfTzRqRIMt/CWM1t3Jbs0vOLGTvaioaMqUKd6jeD/44APbmwgmGRkqogsW3PLFBx7Q6dPrf7KBt25NjZ+eKQwANrI2xXbr1s3RneynT5/2rjTr2bOnQ1W3famgQOPjPZF09mybE6HtIxpsZD6M+mYnu2/OgQgy1q0ronv21H6x7q07f77266f9+mmrVtqzp+f/Lyy889V27dKJEz0XTErSwHx6B4B78MFOdqtUfrt27byl8r/88kvbWzHi9vL1dm0YsXdEg73Mh9GPP/7YOk5p6tSpRUVFtl/fNyfkBqWMDO3WTePidPHi2i824Tny1KnaQh69ejlVyAMAjKuoqLBeiA4cOPC9995zogm32z1gwADvSrNjx4450YpZVvl66yzoTp3sKV9v14gGJ4SLad/5zndqampyc3O3bt2akJBg78VXr149fPjwZcuWffnllykpKcXFxTk5OVYwRUOEhckzz8h778nBg028wpEjMmSI/OEP0rq1/L//J4cOyaOPSliYrb0EAP/wve9979y5c3/7t3978ODBBx980N6L7927d/r06d/97nePHz8+duzYrVu3rl692htMg0mnTpKTI/v2ydy5Ul4uy5bJyJGyYUNzL9v8EQ0OMR9GLQ888ECYrQnF2sD43e9+9+jRo0OHDl23bt3q1atjY2NtbCJEPPKIDBwov/pVE3980CCZNk1SUuTAAfmnf5K2bW3tHAD4n7/5m7+xtkDY5eLFi+np6UlJSdu2bbNK5RcWFk6bNs3GJvzQ8OGyYYO43TJwoJSUyPz5smiRHDvWrGs2c0SDQ/wljNro0qVL6enpI0eOXL9+fUxMTHZ29v79++fPn2+6X4GqRQtZvlz+9Cc5evRbP7Nli3xTjOsO/vxnWb1agvHpHQCcVVVVlZOTExsb+/LLL4eHh6elpR09ejQ9Pd3aAhEKFi2SAwfkl7+Utm1lzRpJTpavvmr61Zo/osEJQRVGb968uXLlyqFDh7788ssikpqaeujQofT0dOsMJzTZY49Jz56SldXEH7f1BQEAhIqCgoKxY8cuW7bsypUrc+bM2bt3b05OjnWGU0hp3VqefVYOHRKXS556SqKimnW1Zo5ocELwhNFNmzYlJCQsXbr0/Pnzs2bN2rNnT25ubteuXU33KxhERsrTT8ubb8qpU6a7AgAhoLS0NCUl5f777z9w4MCQIUNWr16dn58fFxdnul8m9eolb74pzzzT3OswovmhYAijR44cefjhh2fPnr1v3z6rVP7GjRtHjhxpul9B5Yc/lJgY+dd/Nd0PAAhqly9fXr58+ciRI9euXduxY8esrKxPP/00JSXFdL+CCiOavwnsMFpRUbFixYr4+Ph33nnHKpW/b98+7xlOsFHr1vLTn8prr8nZs6a7AgDBqKam5s033xw6dOgLL7xw8+ZNl8t16NChjIwMe/dCQRjR/E8Ah9Hq6uqEhIRf/OIXVVVVTzzxxJEjR1asWNG6dWvT/QpaP/qRREXJjh2m+wEAwejxxx9/7LHHzp07N2PGjD179rz55ptWyVI4gRHNrwRwGG3RosUPf/jDpKSk7du3v/766z169DDdoyAXHS3p6aY7AQBB6kc/+lHfvn3z8vI2b948evRo090JcoxofiVMVc32oHv37ufOnfviiy+sE0Ebpbq6Ojw83N4CpbinV16Rv/97efJJeeUV010BAH/y4IMP/vnPf37vvfeaVvH+5s2blH/xsb17JSFBxoyRvXtNdyWEBfY/+tAptAYACHokUYSmAJ6mBwAAQKAjjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKACg0aqqqrZv3266FwCCAWEUANBor7zyytSpUxctWnTs2DHTfQEQ2AijAIBGCw8Pb9u27Zo1a+Li4n7+859XVFSY7hGAQGU+jCYmvjR1am5YWHvTHQEANFR6enppaWlqamplZeXzzz8/ZMiQlStX1tTUmO6XYX36/Hjq1NxWrcaa7ggQSMyH0d27//aDD1JVo0x3BADQCD179szNzd25c+ekSZPOnDmzdOnS5OTkjz76yHS/TDp58v4PPki9caO/6Y4AgcR8GAUABK7ExMTt27evWrWqb9++u3fvnjJlysMPP1xWVma6XwACRoTpDgBoisWLF7/33numewHUp6rvvPPOhQsXNm3aZLovAAIDb0YBAABgDG9GgYD07rvvmu4C4KGq//M///MP//APZWVlYWFhDz300Isvvmi6UwACBmEUANB0u3fvTk9Pt/YtJSYm5uTkTJo0yXSnAAQSpukBAE1Rdwd93Z31pvsFIMDwZhSN1rbtgenTz3ft2lYkyXRfAJiRk5Pz7LPPVlRUtG7d+qc//ekzzzzTtm1b050CGq1Fi3PTpx/s3buFyFTTfQldvBlFo1VUbN66deb583mmOwLAGFWtqKhISUk5cODAL3/5S5IoAlR19emtW2ceOJBmuiMhjTejAIBGe/LJJxMTE6dMmWK6IwACHm9GAQCNFhkZSRIFYAvCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjMIGlZWVprsAAIANGNF8z3wYHT1aEhIkMtJ0P9BUJ0+eHDBgwAsvvMANDCDExcZKQoLExJjuB5rq5s2b48ePX7p06fnz5033JYSYD6N/+YsUFUmnTjZcat8+IQ753h/+8IczZ84sX758zJgx77//vunuAIAxL74oRUUyY4YNlzpxQi5csOE6aJRt27bt379/5cqVw4cP/93vflddXW26RyHBfBi1y9WrMneuxMXJO++Y7kqI+cd//Mf8/Py4uLji4uJ58+bdf//9Bw4cMN0pAAhsS5fK4MHywgty44bproSSWbNm7d+/f968eRcvXvzxj38cHx+/fv16050KfsbC6PLlEhYmCxfe8sX/83+a/kB5+rTExMiRI/Lww/Kd7whxyJfmzJmzd+/e7OzsDh06FBQUjB07Nj09/cqVK6b7BQC+YPuIVlEhYWFy+bIsXy5jxsiGDc3uIhps2LBh69evd7vdAwcOLCkpWbBgwaJFi44ePWq6X8HM8JvRdetk7157LjVsmHzyieTmSteukp8vY8bI0qXCkg+fiYyMTE9PP3r0aFpaWk1NzcsvvxwbG5uTk8McB4AQYeOI1ratrF8v+fkSFyclJTJ/vtx/Py9ZfGrRokXFxcXZ2dnt27dfs2bN8OHD09PTv/zyS9P9Ck4mw2i3bhIXJ88/b9sFIyIkNVUOHZK0NBGRlStl6FDJyZGbN21rAnfXuXPnnJycwsLCadOmXbx4cdmyZUlJSdu2bTPdLwBwlu0jmojMmSN790p2tnToIAUFMnaspKfL5ct2NoG7aNmyZXp6eklJSWpqanV19csvvzxs2LCVK1fW1NSY7lqwMRlGw8LkmWfkvffk4EE7LxsTIzk5sm+fzJ8vly7JsmUSHy8s+fClsWPHbt261e12DxgwYO/evdOnT1+0aNHx48dN9wsAnOLQiBYZKenpcvSopKVJTY28/LLExkpOjjDn5DP33Xdfbm7url27Jk+e/Pnnny9dunT8+PHbt2833a+gYnia/pFHZOBA+dWv7L/ysGGybp243RIbK4cOyYIFsmiRsOSj+a5evep2uxvyyUWLFh04cCArK6tdu3Zr1qyJi4tbvnz51atXne4hABjh3IjWubPk5EhhoUybJuXlsmyZJCYKc07NV11d/d577zXkk+PGjfvggw9WrVrVr1+/oqKiadOmPfzwwydOnHC6h6FCDcnI0O7dVVVffVVbtNAjR1RVH3hAp0+3uaHKSs3O1vbtVUQjIzUtTa9csbmJEFFdXf3GG2/06NFDRIYPH15dXd3AHzx16pTL5QoLCxORnj175uXl1dTUONpVAPAln41oqup264ABKqIimpKix47Z30SI2LJly5gxY0SkdevWJ0+ebOBPVVRUZGZmRkVFiUibNm0yMzOvX7/uaD9DgfkwWlmpffroD36g6titq6pnzmhqqrZooSLapYtmZ+vNm440FKx27do1ceJE6wEmKSnpo48+8v0VAMA/+XhEu35ds7K0XTsV0agozcjQL790pKFgdfLkSe8rkt69ezfhFUnzr4C6zIdRVX35ZW3ZUk+e9Ny6587pSy/pjRv2N1pUpFOmeB4oExL0gw/sbyL42Phes6amJi8vz3q3GhYWtmTJkrKyMnt7CwC+d5cRraZG/+Vf9MIF+xs9fVpdLg0LUxHt2VNzc7XB81Why973mt53qyIyffr0vXv32tfT0OIXYfSrr7R7d/37v/fcuqmpKqKDBumqVY407XZrv361cxzHjzvSShC4fv26teJTRKKiojIyMr604+n72rVrmZmZrVu3FpG2bdtmZmZ+9dVXzb8sAJhylxHtv/5LRbRjR83KcuQly65dOnGiZ0RLStIPP7S/ieBQU1Njrfj0vg357LPPmn/Z6urqvLy87t27i0h4eLjL5friiy+af9lQ4xdhVFX/9V81KkonTNDp03XDBh0+3HNrzZ2rBw/a33pFhWZlaXS0imibNpqRoVev2t9KQLP2wlsPfCkpKcfsXpd05MiRJUuWWNfv06dPXl6evdcHAJ+5y4h24IB+5zueEW3ECH3/fftbr6nRvDzt0UNFNCxMlyxR5pzq2b179+TJk60Rx9qKZO/1L126lJGR0apVKxGJjo7OzMz8+uuv7W0iuPlLGL16VTt1UhHPCpuqKs3N1S5dVEQjIjQ1Vc+ds78PJ0/WznH07q15ecqSD1Xds2fPtGnTrJvWKtLkXFsbN24cNWqU1dbMmTM/+eQT59oCAIfcfURT1fx8HTHCE0nnzNEDB+zvw7VrmpmprVuriLZtq5mZypyTqp45cyY1NTU8PFy+KdLU8N23jVVaWpqSkmKNaIMHD17l0PRuMPKXMKqqv/jFLbeuql68qGlpGhGhItqpk2Zna1WV/T3ZsUOTkz2/I5KT9a9/tb+JQHHhwoW0tLQWLVqISOfOnbOzs2/W2ee1f//+bdu22d6oNcfRtWtX7xzH2bNnbW8FAJzTkBHNKu3SoUNtaZfLl+3vSVmZulyeEa1PHw3lOafKykrr/CQRiYyMTEtLu1KnmM7Vq1ffeustJ3Yd5efnx8XFWZF0zpw5+/bts72J4GMsjDZccbHOm+e5tYYN03Xr7G+iulpXrtRu3axXpFVLlz4ZannIumk7dOjgvWkv3/Zrcvr06Q5N2atqeXl5RkZGy5YtRSQmJiYrK+uGE6urAMCoCxc0Lc1T2qVzZ6dKu+Tna3y8Z2rxkUf+MQTzkNvtjo2N9a40O2JV26pj+fLlIpKYmLh9+3bbW6+srMzNze3SpYs1pKampp4/f972VoJJAIRRi9utAwfW7jq67d+VDS5f1qee0ilTXhaRDh06vPjiiyGSh+o9xu3fv//2z1RVVa1YsaJNmzbeHYgVFRW296SkpGTBggVWT4YOHbp27VrbmwAA4/bs0alTPSPa2LHqxGKoqip95RWdP79QRCIiIp588skLTmzp9z/FxcXz5s2zxpFhw4at+5Y3WG+99VbPnj2tzUwul+v06dO29+TixYveycZOnTrVm2xEXQETRlX1xg1flK8vLS31bqwJ+jUfhw4dWrhwofWHHTJkyOrVq+/++bplnnr16uVQZbX8/PwRI0Z4w/EBJ1ZXAYBpbrf27+9s+fqQmnSywl9ERIQ3/FXddW2fVebJKu1ivWRxorTLwYMH586da41ow4cP37Bhg+1NBIFACqMWq3x9eLiK6H33OVVZraCgID4+3vrXM3v27OCb47C2/lm/oTp27JiVldXwrX87d+50unx9Q5YNAECgs8rXW6VdnCtfH/STTlVVVd5p8YiIiNTU1HMN3vVcVlbmcrnqlnZx4iVLvQI1R48etb2JgBZ4YdSye7dOnux5oBw3zpHy9bf/4w6ONR/WhqFu3bo1Z8NQvfL1Lpfr888/t72rn3/++RNPPGHtgnxl4UJ9/XVqOgMIPqdO1ZZ26dXLqdIuwTrpVFBQMHLkSOvPNWvWrE8//bQJF9m8efPo0aOti8yYMePjjz+2vZ/Xr1//53/+57Zt24rI96ZN02ef1WvXbG8lQAVqGFXVmhpdtcpTvt6qrGZH/dr6bl/zcffX/n7O3vvNKl9vVVZzrnz97t27H33wwcrISAefPADAtHrl6504MjnIJp0OHz7sXVY3aNCgZi6rs+VNzT2dOnXq8Ucf/ax3b8+Txx/+QFFJDegwaqmo0MxMjYrylK/PzNRmnOz1rQ4ePFh3QfT69evtb8Nhzs1E2Pvr4FvVW13FwVkAgk698vUulzow53SPKn4B4fZXIXYVmW/OGrZGKCzUSZM8I1piIgdnBXwYtfimfL3b7R44cGDArfnwzRrtjRs3Nn+i5B5uX13FwVkAgo5vytcXFRVNnTrV+qWdkJDgRBlpJ1jvL51eJNbY3b1NYU3v9u1bO7174oT9rQSIIAmjli1bdMwYz5PG9Om6d6/9Tdy4ccNbRLdly5b1iuj6G+so3r59+8o3R/GecPLfurXK1ipf39gl5I3gm9VVAGDU4cO6ZIlnRBs0SB2bc3L379/f+5LluH9POu3cuXPChAlWb8ePH/9Xh0+paUjdw+aq++RhTe+G5MFZQRVGVbW6WvPytHt3FdHwcHW59Isv7G/Fl8eLNVlhYeGkSZOsu8ihur53VF5e7i2uERMT49Qq2507dcIEz+/p8eND+uAsAMFr40YdOdLzq27WLHXiyOTr169nZWVFR0eLSFRUVEZGxlX/m3TyTWHB2/lolW3IH5wVbGHUcumSZmRoq1YqotHRmpmpziz5KJw8ebKV9saNG+eztHdPp0+fNp6Vi4uL58+f760k8m1lh5vFN6urAMCoqirNzdWuXT0nKqWmqjNzTmbS3j1VVFQYz8o+WmW7aZOOGuWJpDNmqAM7+v1WcIZRS2mppqR4/rMOHuzIHMft8+CfObGlv8H8bRXBPQ9ks4E1x2E9eVirq5x48gAAo8rLNS1NIyJURGNiNCtLnahe7+N58Hvyq1UEe/bsmTZtmtWZMWPGbHXi4Cxretc6ndya3g2Nw8mDOYxa8vM1Ls4TSefMUSeq19++Q+i6E1v678U/91dZcxxWPrbmOBzJx75ZXQUARpWU6IIFnl91Q4eqE9XrfVNG+p78dn9VvfL1x5w4OMua3m3ZUkW0Y0ennjz8SfCHUVWtrNTcXO3SxXOOaGqqOlG9/tixY4sXL7b+gfbv37+goMD+Nr5FvdPG/LDylLXK1prjsFYOODLHUVBwy+oqJ3b0A4Bp+fk6fHjtS5aDB+1v4sqVK08//bRV4ah9+/a//e1v7W/jW/h/5SlrlW27du28Kwe+dOLgrEOHdOFCz3/mIUN0zRr7m/AbIRFGLefOaWqqtmihItq1q/7Xf11x4t+3VVU+LCzsA5/UZr+9Jr+/3bR17d69e8qUKd4nXUf+iqzVVdaTh3OrqwDAqK+/1qwsbddORbRVK3322Uon8pC3jPSvf/1r2y9+u8CqyX/69GnvKtuePXs6tT0jP19HjKh98nBiR78fCKEwajl4UOfOVRFNSMgYPnz4hg0bbG/i5s2b+fn5tl+2ngA9rdRaZduvXz/vHIcjq2wvXqxdXdWpk2ZnayCfmwUAd3T+vKalaYsWOmNGgXMvETdu3HjD+WniAD2tdNeuXRMnTrS6nZSU9KET5esrKzU7Wzt08EzvpqWpH2f0pgm5MGr53/+96F0T/dBDD/l5ZbXb5efnx8fHW/2fPXv2PidWwjrJWmUbFRVlrbJ1andkcbHOm+d5oBw2TJ3Y0Q8AphUW1sycucD3hfzsUlJSsmDBAm/1lbVOrIR1Ur1VtkuWLCkrK7O/mQsXPE8eItq5s2Znqx9PhDZWiIZR/WbjubXmw9p47siaD7uVlpZ6z94cPHiwU2dv+sTJkye9cxy9e/d2qpKI260DB9aeI+rEjn4AMM2vNp43UHl5uffszZiYmKysLB+8gnWIdUKptZXZOqHUicMOdc8enTbNM6KNHatO7Og3IXTDqKVuSU4H13zY4erVq96jeKOjo208itesLVu2jBkzxvoFOm3atItOVFa7cUOzs7V9+9o5Dj8+NwsAmqZuSU4HJ53sYJ3qaZ3YFx4e7nK5HDmxz+eOHDnywAMPWCNabGzsHofe8rrd2r9/7UsWJ3b0+1aoh1FLvcOKHFnz0QzWTdu9e3fvTfuFE+dKmeP9A06Mj69p0cK5g7M0NVXDw1VE77tPc3PVXx88AKDJ6k46+VX5eq9NmzaNGjXKGnNnzpz5iRPnShll/QFjOnY836mTzpzp0MFZtVvYoqI0I0MDYXb32xBGPXx8jHvD7dixIzk52bppk5OTd+zYYbpHTrl06dLRFSs0MlJFtEMHfeklray0v5ndu3XyZM8D5bhx6pOiBwDgYzt27PCr8vWWEydOuFwuq1d9+vTJC95zL6uqqj554w3t1MlT2uXJJ/XCBfubOXVKXS4NC1MR7dVL8/LUzx48Gogweou6az6s8vWOrPloGB8tqfQ3paW15esdOzhLV63Sfv0854guWaJGz80CACf41ayaj5ZU+pvy8try9c4dnLVrl06c6Bk3k5L0o4/sb8JhhNE7KCsrM/voFkDrfpySn6/x8Z5ba/Zshw7O0sxMjYpSEW3TRjMz1cS5WQDgqLr7DawU6OP9Bj7abO7PfHJwlublaY8enpcsLpeaODeryQij36ruopYZM2Z87MTGmtv4qAxnQLi9fL0ThVSPH9eHHvL8jkhOtv/6AOAHTFViqVeG86MAfGlnm3rl650opHrliv7sZ54XsR06OLIwwBmE0bux5ji6devmneM4e/asc8354oCigGOVr7cqqzlXvn7LFh0zRl9/3f4rA4DfKCgo8FmN6noHFIXKSrO78035+sOHdckS/f737b+yYwij93bp0iVvIbSOHTs6UQit7tHtXbp08fNTPQ3wHpxlla9fv97+Jm7eZHM9gKDng9P7fHR0e+DyTfl6J3YAO4Yw2lCHDh1auHCh9UA5ZMiQNWvW2HJZ6yje9u3be4/ivUIJzG9Tr3z90aOmOwQAAenixYtpaWkREREi0qlTp+zs7CqbJp3cbveAAQO8K82OBX4JTKcUFenUqZ4RLSFBt20z3SGTCKONU+/w3P379zfnam63OzY21nvTHuFwoHuqW76+ZUvK1wNAkxUXF8+bN88ag4YNG7a+eZNOe/bsmTZtmnW1sWPHbg2Ww4GcVa98fSCcm+UEwmijWe8yO3To4H2Xebnxaz6Ki4vnz5/vPYp3HcemN8rp05SvBwBbuN3ugQMHel+LHG38pNOFCxfS0tKslWadO3dmpVnjWOXro6Nry9eHWv0cwmiTNfneKy8v906OxMTE2Dg5EnIKC28pX799u+kOAUBAunHjhnfBWMuWLRu+YMyWtzNQDZ7y9U1DGG2WRs1KWMvGraN4rWXjwXEUr0lW+fq+fZtbvr6mRl99VRMTtU0bbd9eZ83SDRvs7isA+DVrK214eLiI3Hfffbm5udV3nXTKz8+Pi4uza90aVFV37tQJEzwvWcaP1yafmxVogxph1AZut7t///53X6+9cePGkSNHWp+ZNWvWp59+6vt+Bi2rfH3r1rXl6xt7sMfjj2tYmP7d3+k77+gf/+jZuf+b3zjTXQDwX4WFhZMnT7ZGq3Hjxm2/06RTvR29q1ev9n0/g5Yt5esDbVAjjNrjLpUsDh8+7C01PGjQIJ+VGg45ZWXqcnkeKHv3bsQcx9tvq4hmZ9/yRZdLIyMdKUoMAP7NOn6lb9++3jOTvMev3F7r0MfnOYWKa9c0M1NbtVIRbdtWMzO14X/PATioEUbtdOrUKW+N3169eq1cufK5556rewhbSBzFa9bmzTp6tCeSTp+uDTk3a8IE7devfi39sjKNjNQf/9ihbgKAn6uoqPCeJt+mTZvnnnvutdde89kpMFD9pny9NaINGqQNfJkVgIMaYdR+27dvHzdunIhY25vCw8OfeOKJzwPqlNjAVl2teXnarZuKaHi4ulz6xRff+uHr17VFizvfnxMmaHy8c90EAP937NixxYsXW5N71qsWn52PDY+CAh050hNJZ83Suy/zC8xBLUxVBXarqan5/e9/f+bMmZKSkmXLliUmJpruUei5fFmysiQ7W27ckI4dZflyWbZMWrWq/7HPPpMBAyQrSzIy6n/rkUfk/ffl0iXf9BcA/NbmzZvfeuutTp06JScne7MpfOfmTXnjDfn5z+X8eYmIkCeekF/+Urp2vcMnA3NQI4wiqJWWyk9/KmvWiIgMGSIvvSQpKbd8wLpvX3hBfvaz+j/7yCPyl79IebmPugoAwF2Ul8svfiH/8R9y86bExEhmpjz5pERE3PKZwBzUwk13AHDSkCGyerWsWSNDhkhpqSxaJAsXSmlp7Qe6d5cWLeTEiTv87IkT0quXz3oKAMDddOokOTmyZ4/MmiWXLsmyZZKQIJs23fKZwBzUCKMIAQsXyv79kp0tHTrIunW3hNGoKElKkrVr5ebNW37k5EkpKpJvisgCAOAXRo6UjRvF7ZbYWNm3TwoKbvluYA5qhFGEhshISU+X0lL59a/rz9Snp8uJE/Lb397yxWefFRF58knf9RAAgAZatEgOHJCXXpJnnqn/rQAc1FgzCoj83d9JXp48+qjMny83bsgf/yj5+fKb38hPfmK6ZwAANFKgDWqEUUBEVV5/Xf7zP+XgQYmMlMRE+dnPZO5c090CAKDxAm1QI4wCAADAGNaMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACMIYwCAADAGMIoAAAAjCGMAgAAwBjCKAAAAIwhjAIAAMAYwigAAACM+f/n03IfKxdEPwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<PIL.PngImagePlugin.PngImageFile image mode=RGB size=900x300 at 0x7F1308344978>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rxn_str = f\"{Chem.MolToSmiles(mol)}>>{Chem.MolToSmiles(canon)}\"\n",
"rxn = rdChemReactions.ReactionFromSmarts(rxn_str, useSmiles=True)\n",
"Draw.ReactionToImage(rxn, subImgSize=(300, 300), useSVG=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I am not a chemist. Is this the expected behavior? \n",
"\n",
"I could found examples of molecules with allenes within rings, but only for larger rings:\n",
"\n",
"https://chemistry.stackexchange.com/questions/47568/do-allenes-form-rings "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f1303e10940>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.MolFromSmiles(Chem.MolToSmiles(canon))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"No error is raised during the parsing of the molecule."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Considered Tautomers"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<PIL.PngImagePlugin.PngImageFile image mode=RGB size=600x200 at 0x7F1303F3C198>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def pairwise(iterable):\n",
" \"\"\"Utility function to iterate in a pairwise fashion.\"\"\"\n",
" a, b = tee(iterable)\n",
" next(b, None)\n",
" return six.moves.zip(a, b)\n",
"\n",
"\n",
"class TautomerScorer(TautomerCanonicalizer): \n",
" \n",
" def score(self, t):\n",
" \n",
" score = 0\n",
" # Add aromatic ring scores\n",
" ssr = Chem.GetSymmSSSR(t)\n",
" for ring in ssr:\n",
" btypes = {t.GetBondBetweenAtoms(*pair).GetBondType() for pair in pairwise(ring)}\n",
" elements = {t.GetAtomWithIdx(idx).GetAtomicNum() for idx in ring}\n",
" if btypes == {BondType.AROMATIC}:\n",
" logging.debug('Score +100 (aromatic ring)')\n",
" score += 100\n",
" if elements == {6}:\n",
" logging.debug('Score +150 (carbocyclic aromatic ring)')\n",
" score += 150\n",
" # Add SMARTS scores\n",
" for tscore in self.scores:\n",
" for match in t.GetSubstructMatches(tscore.smarts):\n",
" logging.debug('Score %+d (%s)', tscore.score, tscore.name)\n",
" score += tscore.score\n",
" # Add (P,S,Se,Te)-H scores\n",
" for atom in t.GetAtoms():\n",
" if atom.GetAtomicNum() in {15, 16, 34, 52}:\n",
" hs = atom.GetTotalNumHs()\n",
" if hs:\n",
" logging.debug('Score %+d (%s-H bonds)', -hs, atom.GetSymbol())\n",
" score -= hs\n",
" return score\n",
"\n",
" \n",
"logger.setLevel(logging.INFO)\n",
"te = TautomerEnumerator()\n",
"tautomers = te.enumerate(mol)\n",
"ts = TautomerScorer()\n",
"scores = [str(ts.score(t)) for t in tautomers]\n",
"Draw.MolsToGridImage(tautomers, legends=scores)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding a penalty to the score"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-03-13 09:47:19,897 | DEBUG : Applied rule: oxim/nitroso r to O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,897 | DEBUG : New tautomer produced: O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,899 | DEBUG : Applied rule: oxim/nitroso f to O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,899 | DEBUG : Previous tautomer produced again: O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,901 | DEBUG : Tautomer: O=C1C=NC=C[N+]1=O\n",
"2020-03-13 09:47:19,902 | DEBUG : Score +2 (C=O)\n",
"2020-03-13 09:47:19,903 | DEBUG : Score +2 (N=O)\n",
"2020-03-13 09:47:19,903 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,904 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,905 | DEBUG : New highest tautomer: O=C1C=NC=C[N+]1=O (6)\n",
"2020-03-13 09:47:19,906 | DEBUG : Tautomer: O=C1C=NC=C=[N+]1O\n",
"2020-03-13 09:47:19,907 | DEBUG : Score +4 (oxim)\n",
"2020-03-13 09:47:19,908 | DEBUG : Score +2 (C=O)\n",
"2020-03-13 09:47:19,909 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,909 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,910 | DEBUG : Score +1 (C=hetero)\n",
"2020-03-13 09:47:19,911 | DEBUG : Score -300 (bug)\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f1303e108f0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logger.setLevel(logging.DEBUG)\n",
"\n",
"tc.scores = (TautomerScore('benzoquinone', '[#6]1([#6]=[#6][#6]([#6]=[#6]1)=,:[N,S,O])=,:[N,S,O]', 25),\n",
" TautomerScore('oxim', '[#6]=[N][OH]', 4),\n",
" TautomerScore('C=O', '[#6]=,:[#8]', 2),\n",
" TautomerScore('N=O', '[#7]=,:[#8]', 2),\n",
" TautomerScore('P=O', '[#15]=,:[#8]', 2),\n",
" TautomerScore('C=hetero', '[#6]=[!#1;!#6]', 1),\n",
" TautomerScore('methyl', '[CX4H3]', 1),\n",
" TautomerScore('guanidine terminal=N', '[#7][#6](=[NR0])[#7H0]', 1),\n",
" TautomerScore('guanidine endocyclic=N', '[#7;R][#6;R]([N])=[#7;R]', 2),\n",
" TautomerScore('aci-nitro', '[#6]=[N+]([O-])[OH]', -4),\n",
" TautomerScore('bug', '[*;R1]=[*;R1]=[*;R1]', -300), # this smarts is not specific enough\n",
" )\n",
"\n",
"tc.canonicalize(mol)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Of course this work-around is sub-optimal:\n",
"\n",
"- the smarts applied here is not nearly specific enough and side effects are to be expected in other molecules\n",
"- maybe it would be wiser to prevent the enumeration of such tautomer instead of applying a penalty to the score?"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment