Skip to content

Instantly share code, notes, and snippets.

@jose-manuel
Last active August 28, 2019 16:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jose-manuel/04d69dd3ac52cca74449e73d614df42e to your computer and use it in GitHub Desktop.
Save jose-manuel/04d69dd3ac52cca74449e73d614df42e to your computer and use it in GitHub Desktop.
Possible inconsistency between smiles and drawing of a molecule after murcko scaffold extraction
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Inconsistency with a molecule smiles and its drawing"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import rdkit\n",
"from rdkit import Chem\n",
"from rdkit.Chem.MolStandardize.metal import MetalDisconnector\n",
"from rdkit.Chem.MolStandardize.charge import Uncharger\n",
"from rdkit.Chem.MolStandardize.normalize import Normalizer\n",
"from rdkit.Chem.MolStandardize.tautomer import TautomerCanonicalizer\n",
"from rdkit.Chem import rdmolops\n",
"from rdkit.Chem.Scaffolds import MurckoScaffold\n",
"from rdkit.Chem import Draw"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Init"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def standardize(mol):\n",
" # disconnect metals\n",
" mol = metal_disconnector.disconnect(mol)\n",
" # remove isotopes\n",
" for a in mol.GetAtoms():\n",
" a.SetIsotope(0)\n",
" # normalize functional groups\n",
" mol = normalizer.normalize(mol)\n",
" # remove charges\n",
" mol = uncharger.uncharge(mol)\n",
" # enumerate canonical tautomer\n",
" mol = canonicalizer.canonicalize(mol)\n",
" # remove stereo\n",
" rdmolops.RemoveStereochemistry(mol)\n",
" \n",
" return mol"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"metal_disconnector = MetalDisconnector()\n",
"normalizer = Normalizer()\n",
"uncharger = Uncharger()\n",
"canonicalizer = TautomerCanonicalizer()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cn1cnc2c(NO)ncnc21\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE515400>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = Chem.MolFromSmiles('Cn1cnc2c(NO)ncnc21')\n",
"print(Chem.MolToSmiles(m))\n",
"Draw.MolToImage(m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Without standardization"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c1ncc2nc[nH]c2n1\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAUbUlEQVR4nO3de3BUhdnH8R+BEG5pMDANCNhSLgYM1eEWsFawRS0aiiNCneIGiiGIM4Cm1ntJrFLSajGRqRipDgGtCEhtUNOKr1pinSBCRVQkYJFBoCBSJU24hPC8f5y2lhY0geyePMn3M5NRN+fseRKGr2f3XLaFmZkAwIG4sAcAgLoiWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWHBr5EipRQvp9ddPfLxLF+nZZ8OYCNFGsOBap07SLbeEPQVihWDBtawsaft26Zlnwp4EsUCw4FqHDlJurnTHHVJNTdjTINoIFtzLypJatpSKisKeBNFGsOBeq1ZSfr50zz3SwYNhT4NoIlhoEsaOlfr1C8KFpotgOVHXQ/gjR0p33/2/63fvLj3xRBQHbATuv18qLJQqK8OeBNFCsBxpzofwzaQlS6SrrpKOHz/5Munp0pVXStXVJz5eVSWNGiWVlkZ9TEQZwXKkuR7CX7tWGj5cmjlTGjEiiJckvfrq/+5NLlsWfP+qqz5/rEWLIGZXXx0EbcuWWE2OhkawHGluh/B375amTZMuukjq21d6/33p5puDI4L10a6dNGeOVFER7KUOGCDNmiV9+mlUxkYUESxn6nIIf948qXPnE7/27IndjGfq6NHgvah+/YK9oXXrpMWLpZSUM3veHj2C53nxRelPf5J69Qq2U1vbMHMj+giWM3U5hD91qvTWWyd+ffWrsZvxTKxaFYTqgQek+fOlV16RLrigYbcxcqS0YYP04IPSz38uDR4cBAyNH8Fy6MsO4ScmBkcF//Orvi+jYm3zZmn0aOnaa6VIJHj5lpkZvP8UDXFxwfNv2SJdfrl02WXSmDHBe4RovAiWU03lEP6BA8H7SeefH4T2vfekvDypbdvYbL9jxyD8mzYFb9anpgbzeP+9NlUEy6lTHcL3oqamRo88sk+9e0vl5cFLsmXLpK99LZx5+vaVnnsuOAJbWir17y+tWPGJ7F+HJNEotDD+RJqlo0el1q3D2fYf/vAH5eTk6OyzL1Ak8ltFIsFLtLqtK/3610FYojX/0aPSQw9JixePV2LibhUUFGjIkCHR2RjqhT2sZio/X/r614OXP6+99vm5TdG0detWTZgwQWPHjtWll16qlSsf0aRJdYtVRYWUkRGcX5WWFt0je61bByfovvLKIxo8eLAuvPBCTZgwQTt37ozeRlE3hmbpk0/MiovNMjLMWrc269HDLDvbrKTErKamYbdVWVlpubm5lpCQYBkZGbZt27Z6rGuWm2uWkBDM+sEHDTtbXWzYsMEuvvhia9++veXm5tqhQ4diPwTMzIxgwf7+d7Nly8wiEbP27c06dQr+vaTE7OjR03/e2tpaKy4utpSUFEtNTbXS0tJ6rBsENSXFLDXVrB6rRk1JSYn17NnTevToYcXFxWGP0ywRrEaqqir4SxtrBw+aLV1qNn58EK8rrvg/mzJlij3//PN2+PDhOj9PeXm5paenW3JyshUUFFhNPXbbysvN0tPNkpPNCgoafo/vTFRXV1t+fr4lJibaJZdcYhs3bgx7pGaFYDVS11xjlpMT7gzV1Wa///3LNnHiREtKSrKkpCSbOHGirVy50qqrq0+6zs6dOy0SiVh8fLxlZ2fbxx9/XOft7dwZ7NnFxwcvT/fta6ifpOF99NFHFolErGXLlhaJRGzv3r1hj9QsEKxG6JFHzDp0MNuyJexJPnfs2DErKyuzmTNnWkpKirVt29YyMjKsuLjYPvvsM6uqqrL8/Hzr0KGDffe737W33367zs9dVVVls2fPtgsueNMuu8zs3Xej+IM0sDfeeMOGDx9uZ511luXn59uRI0fCHqlJI1iNzLvvmrVrZ/bEE2FPcmo1NTW2evVqmzZtmqWkpFibNm2sd+/e1qdPHyspKanz8xw/ftyefPJJ6969u/Xp08deeGFNFKeOntraWnvsscesS5cuNm7cODt6Jm/84QsRrEbk8GGz8883mzw57Enqrra21ubOnWvJycn12rtYv369XXTRRf8+8laf98caqz179pgk27x5c9ijNFmch9WI/PjHwc3mHnoo7EnqLi4uTmlpaYqPj1frOpzJuX//fs2aNUvp6enq2bOntm3bpry8PCUkJMRg2ujq0KFD2CM0ea3CHgCBF16QFi6U1qwJrqlrampqavTwww9r9uzZSk1NVVlZmYYNGxb2WPW2ceNGVVdXa/jw4WGP0iwRrEZg1y5p0iRp7tzgGsGmZtWqVbr55pt1+PBhzZ8/X5FIRC2idRuGKFuyZIk++ugjghUSXhKG7Pjx4DYngwYFd9NsaoqLi3XttdcqEomooqJCmZmZbmOF8LGHFbI5c6R33pE2bozevZ/CNGHCBH3nO99Rjx49wh4FTQDBCtEbb0j33SetXBl8XFdT1LZtW2KFBsNLwpB8+mmlIpHDmjUruK8VgC9HsEKSnX29+vW7QXPmhD0J4AcvCUOwcOFClZaWav369YqPD3sawA/2sGJs69atysnJ0YIFC9S3b9+wxwFcIVgxdOTIEU2YMEFXX321rrvuurDHAdwhWDH0k5/8RJWVlZo/f37YowAu8R5WjJSWlqqoqEhr1qzRV77ylbDHwWlKTOyu5OSTXzPZokVLpaYOVFxcmxhP1XwQrBjYtWuXMjMzNWfOHKU3xWtvmpHKypt04MDJv2fWVu+/v17Hj8d2puaEl4RRdvz4cU2aNEkDBw5UTk5O2OMArhGsKJs7d642bdqkRYsWKe4/Ps/q2LFjWrFiRYiTAf7wkjCK1q1bp5/97Gd65pln1LVr1xO+t2PHDk2ePFmHDh1SJBIJaULAF/awoigrK0s33nijMjIy/ud7vXr1UkFBgW688UZt2bIlhOkAfwhWiLKysjRmzBhNnDhRR48eDXscoNEjWFH0m9/8Rg8//LCee+65Uy6zYMECHThwQHfffXcMJwN8IlhRNGTIEM2ePVvXX3+99uzZc9JlkpKStHTpUhUWFur555+P8YSALwQryu644w4NGDBAkydP1vFTnKAzdOhQ/fSnP9WUKVP0t7/9LcYTAn4QrCiLi4tTcXGxNmzYoHnz5p1yuTvvvPPfYTOzGE4I+EGwYqBbt25avHix7rrrLq1du/aky/wrbOvXr//CsCFcbdtK7dqFPUXzRbBiZPTo0Zo2bZomTpyogwcPnnSZbt266bHHHlNpabrWr4/xgKiTe++VHn887CmaL4IVQ/fff78SExM1Y8aMUy7z/e9/X/36XaQf/EA6RdeAZotgxVBCQoKWLVumlStX6oknnjjlcg88EHyY6syZMRwOZ6xVK+nSS6X27cOepOkiWDHWp08fzZs3T9OnT1dFRcVJl0lIkJ58Ulq+XPqCrqGRadNGevFFiQ8Jih6CFYKpU6dq9OjRuvXWRaqpOfky/ftLDz4oTZ8unaJrQLNDsELy6KOPafPmObrrrlMvk50tZWRIEydKXLkDEKzQdOyYqCVLWqiwUPqiE9wXLJD275dmz47dbEBjRbBCNHSodPfd0pQp0qlOcO/YUXr6aamgQFq9OpbTAY0PwQrZXXdJaWnS5MnSqU5wHzo0WG7SpGBvy5sZM2bo9ttv1z/+8Y+wR4mKkSOlFi2k118/8fEuXaRnnw1joqaLYIUsLk5avFhavz54k/1U7rxTmjNHOuus2M3WUEaNGqUVK1bovPPO09NPP90kLz3q1Em65Zawp2j6CFYj0K2bVFws3XGHdIord9SypfSjHwX/9Gbs2LHavHmzcnJylJ2drWHDhqm8vDzssRpUVpa0fbv0zDNhT9K0EaxG4oorpKlTpeuukyorw56m4cXHx2vWrFn64IMPNGzYMH37299WZmZmk7k7RYcOUm5u8D+dU52qgjNHsBqRX/0qOEva0xnutbW1euedd1RTU1Onu6Z27txZhYWFWrt2rbZv367evXsrLy9PR44cicG00ZWVFewBFxWFPUnTRbAakYQE6be/lZYtC850b6yOHTum1atX64YbblC3bt10zz33KDk5WWlpaVq1alWdnmPgwIEqKyvTU089pUWLFiktLU3Lly+P8uTR1aqVlJ8v3XMP14FGC8FqZPr3l+bNk264oXGd4V5bW6vXXntNs2bNUvfu3TV27Fjt2rVLv/zlL7V3715t3LhR119/vX74wx9q1KhR2rRpU52ed8yYMXrvvfeUlZWlKVOm1GvdxmjsWKlfvyBciAJDo3TNNWY5OeHOcOjQISspKbFIJGJJSUnWrl07y8jIsOLiYqusrDzpOjt37rRIJGLx8fGWnZ1t+/btq/P2zmTdMI0YYXbvvZ//d3m5Wbt2wdfvfhfWVE0TwWqkqqrMamvD2G7VvyOVmJhoycnJFolErKSkxA4fPlzn5ykvL7dhw4ZZcnKyFRQUWE1NTb3WTU9PP611o6262mzJkhMf++9gmZmNH28mEayGRrBgBw6YLVpklp0929q0aWNdunSx6dOn20svvXRGsaitrbXi4mJLSUmx1NRUe+GFF0573dLS0tOeoyEcP262dKnZOeeY9epl9vHHoY7TbBGsZmrfPrNHHzW7/HKz+HizHj3M7r23zNasWWO1DbxrV1lZabm5uZaQkGAZGRm2bdu2mKzbUDZsMLv4YrP27c1yc80OHYr5CPgngtWMfPyxWXGxWUZGEKmvfc1s5kyzsrJgDyLaKioqbPz48da6dWubOXOmffbZZzFZ93Tt3x/8flq1MotEzPbsifom8SUIlhMjRgTvifz5zyc+npLyxe+T7NhhVlBg9q1vmcXFmX3jG7GN1Mm89NJLlpaWZl27drWioqJ67dGdybp1dfRo8DtLSjIbMsTs9dcbfBM4TQTLiREjzDp1Mhs+/MTHTxas7ds/j1SLFmb9+wcvZd58Mzaz1kVNTY0VFRVZ586dbdCgQVZWVhaTdb/M6tXB7+vss4O90bCijpMjWE6MGGF2221mXbqYrVjx+eP/Haz77gsiNXSo2S9+YRbCWz718sknn9jMmTMtPj7exo8fbx9++GFM1v1vW7aYXXmlWdu2we/54MHTfipEESeOOlKX69UyM6UPPwwuor71VqlXr5iOWG/JyckqLCzU22+/rcrKSvXv3195eXk6dOhQndd98803tX//fg0dOlSHDx+u1/Y//VS6/XZpwIDgFjHvvhuc9JmYeJo/EKKKYDnzZder9eghnXNObGdqCKmpqSotLdXSpUu1ZMkS9e3bV4sXL67TrWi++c1v6uWXX9aaNWvUpk2bOm2vtrZWCxYs0LXXLtEf/xjcHHHVKqlnzzP9SRBNBMuZpn692pgxY7R582bdcsstmjFjhi655BK99dZbdVr33HPPrdNyL7/8sgYOHKi8vDyNG3dE69dLF198BkMjZgiWQ039erXWrVtr1qxZev/993XuuedqyJAhyszM1N69e8/oeXfu3KnMzEx973vf08iRI1VRUaGpU7MUx98CN/ijcur++6XCwqZ576x/6dq1q4qKilReXq6//vWvp30rmqqqKuXl5alv377as2eP/vKXv6iwsFBJSUlRmhzRQrCcSk+XrrxSqq4Oe5LoGzRokMrKyvT4449r0aJFGjBgQJ1uRWNmWr58ufr376+nnnpKy5cv1+rVq3XeeefFYGpERdiHKYH6qKqqstzcXGvTpo2NGjXKNm3adNLl1q1bZxdeeKF17NjR8vPz63XhNhov9rDgSrt27ZSXl6etW7eqa9euGjhwoKZNm6b9//w4od27d2vatGkaPny4evXqpS1btui2225TQkJCyJOjIbQwa4IfYYJm49VXX9VNN92k3bt3KyMjQ8uWLdPQoUP14IMP6vzzzw97PDQwggX3amtrtXDhQu3YsUODBw/WuHHjwh4JUUKwALjBe1gA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANz4f8zphzx5iAzOAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52D7B8>"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ms = MurckoScaffold.GetScaffoldForMol(mol=m)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c1ncc2nc[nH]c2n1\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAUbUlEQVR4nO3de3BUhdnH8R+BEG5pMDANCNhSLgYM1eEWsFawRS0aiiNCneIGiiGIM4Cm1ntJrFLSajGRqRipDgGtCEhtUNOKr1pinSBCRVQkYJFBoCBSJU24hPC8f5y2lhY0geyePMn3M5NRN+fseRKGr2f3XLaFmZkAwIG4sAcAgLoiWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWHBr5EipRQvp9ddPfLxLF+nZZ8OYCNFGsOBap07SLbeEPQVihWDBtawsaft26Zlnwp4EsUCw4FqHDlJurnTHHVJNTdjTINoIFtzLypJatpSKisKeBNFGsOBeq1ZSfr50zz3SwYNhT4NoIlhoEsaOlfr1C8KFpotgOVHXQ/gjR0p33/2/63fvLj3xRBQHbATuv18qLJQqK8OeBNFCsBxpzofwzaQlS6SrrpKOHz/5Munp0pVXStXVJz5eVSWNGiWVlkZ9TEQZwXKkuR7CX7tWGj5cmjlTGjEiiJckvfrq/+5NLlsWfP+qqz5/rEWLIGZXXx0EbcuWWE2OhkawHGluh/B375amTZMuukjq21d6/33p5puDI4L10a6dNGeOVFER7KUOGCDNmiV9+mlUxkYUESxn6nIIf948qXPnE7/27IndjGfq6NHgvah+/YK9oXXrpMWLpZSUM3veHj2C53nxRelPf5J69Qq2U1vbMHMj+giWM3U5hD91qvTWWyd+ffWrsZvxTKxaFYTqgQek+fOlV16RLrigYbcxcqS0YYP04IPSz38uDR4cBAyNH8Fy6MsO4ScmBkcF//Orvi+jYm3zZmn0aOnaa6VIJHj5lpkZvP8UDXFxwfNv2SJdfrl02WXSmDHBe4RovAiWU03lEP6BA8H7SeefH4T2vfekvDypbdvYbL9jxyD8mzYFb9anpgbzeP+9NlUEy6lTHcL3oqamRo88sk+9e0vl5cFLsmXLpK99LZx5+vaVnnsuOAJbWir17y+tWPGJ7F+HJNEotDD+RJqlo0el1q3D2fYf/vAH5eTk6OyzL1Ak8ltFIsFLtLqtK/3610FYojX/0aPSQw9JixePV2LibhUUFGjIkCHR2RjqhT2sZio/X/r614OXP6+99vm5TdG0detWTZgwQWPHjtWll16qlSsf0aRJdYtVRYWUkRGcX5WWFt0je61bByfovvLKIxo8eLAuvPBCTZgwQTt37ozeRlE3hmbpk0/MiovNMjLMWrc269HDLDvbrKTErKamYbdVWVlpubm5lpCQYBkZGbZt27Z6rGuWm2uWkBDM+sEHDTtbXWzYsMEuvvhia9++veXm5tqhQ4diPwTMzIxgwf7+d7Nly8wiEbP27c06dQr+vaTE7OjR03/e2tpaKy4utpSUFEtNTbXS0tJ6rBsENSXFLDXVrB6rRk1JSYn17NnTevToYcXFxWGP0ywRrEaqqir4SxtrBw+aLV1qNn58EK8rrvg/mzJlij3//PN2+PDhOj9PeXm5paenW3JyshUUFFhNPXbbysvN0tPNkpPNCgoafo/vTFRXV1t+fr4lJibaJZdcYhs3bgx7pGaFYDVS11xjlpMT7gzV1Wa///3LNnHiREtKSrKkpCSbOHGirVy50qqrq0+6zs6dOy0SiVh8fLxlZ2fbxx9/XOft7dwZ7NnFxwcvT/fta6ifpOF99NFHFolErGXLlhaJRGzv3r1hj9QsEKxG6JFHzDp0MNuyJexJPnfs2DErKyuzmTNnWkpKirVt29YyMjKsuLjYPvvsM6uqqrL8/Hzr0KGDffe737W33367zs9dVVVls2fPtgsueNMuu8zs3Xej+IM0sDfeeMOGDx9uZ511luXn59uRI0fCHqlJI1iNzLvvmrVrZ/bEE2FPcmo1NTW2evVqmzZtmqWkpFibNm2sd+/e1qdPHyspKanz8xw/ftyefPJJ6969u/Xp08deeGFNFKeOntraWnvsscesS5cuNm7cODt6Jm/84QsRrEbk8GGz8883mzw57Enqrra21ubOnWvJycn12rtYv369XXTRRf8+8laf98caqz179pgk27x5c9ijNFmch9WI/PjHwc3mHnoo7EnqLi4uTmlpaYqPj1frOpzJuX//fs2aNUvp6enq2bOntm3bpry8PCUkJMRg2ujq0KFD2CM0ea3CHgCBF16QFi6U1qwJrqlrampqavTwww9r9uzZSk1NVVlZmYYNGxb2WPW2ceNGVVdXa/jw4WGP0iwRrEZg1y5p0iRp7tzgGsGmZtWqVbr55pt1+PBhzZ8/X5FIRC2idRuGKFuyZIk++ugjghUSXhKG7Pjx4DYngwYFd9NsaoqLi3XttdcqEomooqJCmZmZbmOF8LGHFbI5c6R33pE2bozevZ/CNGHCBH3nO99Rjx49wh4FTQDBCtEbb0j33SetXBl8XFdT1LZtW2KFBsNLwpB8+mmlIpHDmjUruK8VgC9HsEKSnX29+vW7QXPmhD0J4AcvCUOwcOFClZaWav369YqPD3sawA/2sGJs69atysnJ0YIFC9S3b9+wxwFcIVgxdOTIEU2YMEFXX321rrvuurDHAdwhWDH0k5/8RJWVlZo/f37YowAu8R5WjJSWlqqoqEhr1qzRV77ylbDHwWlKTOyu5OSTXzPZokVLpaYOVFxcmxhP1XwQrBjYtWuXMjMzNWfOHKU3xWtvmpHKypt04MDJv2fWVu+/v17Hj8d2puaEl4RRdvz4cU2aNEkDBw5UTk5O2OMArhGsKJs7d642bdqkRYsWKe4/Ps/q2LFjWrFiRYiTAf7wkjCK1q1bp5/97Gd65pln1LVr1xO+t2PHDk2ePFmHDh1SJBIJaULAF/awoigrK0s33nijMjIy/ud7vXr1UkFBgW688UZt2bIlhOkAfwhWiLKysjRmzBhNnDhRR48eDXscoNEjWFH0m9/8Rg8//LCee+65Uy6zYMECHThwQHfffXcMJwN8IlhRNGTIEM2ePVvXX3+99uzZc9JlkpKStHTpUhUWFur555+P8YSALwQryu644w4NGDBAkydP1vFTnKAzdOhQ/fSnP9WUKVP0t7/9LcYTAn4QrCiLi4tTcXGxNmzYoHnz5p1yuTvvvPPfYTOzGE4I+EGwYqBbt25avHix7rrrLq1du/aky/wrbOvXr//CsCFcbdtK7dqFPUXzRbBiZPTo0Zo2bZomTpyogwcPnnSZbt266bHHHlNpabrWr4/xgKiTe++VHn887CmaL4IVQ/fff78SExM1Y8aMUy7z/e9/X/36XaQf/EA6RdeAZotgxVBCQoKWLVumlStX6oknnjjlcg88EHyY6syZMRwOZ6xVK+nSS6X27cOepOkiWDHWp08fzZs3T9OnT1dFRcVJl0lIkJ58Ulq+XPqCrqGRadNGevFFiQ8Jih6CFYKpU6dq9OjRuvXWRaqpOfky/ftLDz4oTZ8unaJrQLNDsELy6KOPafPmObrrrlMvk50tZWRIEydKXLkDEKzQdOyYqCVLWqiwUPqiE9wXLJD275dmz47dbEBjRbBCNHSodPfd0pQp0qlOcO/YUXr6aamgQFq9OpbTAY0PwQrZXXdJaWnS5MnSqU5wHzo0WG7SpGBvy5sZM2bo9ttv1z/+8Y+wR4mKkSOlFi2k118/8fEuXaRnnw1joqaLYIUsLk5avFhavz54k/1U7rxTmjNHOuus2M3WUEaNGqUVK1bovPPO09NPP90kLz3q1Em65Zawp2j6CFYj0K2bVFws3XGHdIord9SypfSjHwX/9Gbs2LHavHmzcnJylJ2drWHDhqm8vDzssRpUVpa0fbv0zDNhT9K0EaxG4oorpKlTpeuukyorw56m4cXHx2vWrFn64IMPNGzYMH37299WZmZmk7k7RYcOUm5u8D+dU52qgjNHsBqRX/0qOEva0xnutbW1euedd1RTU1Onu6Z27txZhYWFWrt2rbZv367evXsrLy9PR44cicG00ZWVFewBFxWFPUnTRbAakYQE6be/lZYtC850b6yOHTum1atX64YbblC3bt10zz33KDk5WWlpaVq1alWdnmPgwIEqKyvTU089pUWLFiktLU3Lly+P8uTR1aqVlJ8v3XMP14FGC8FqZPr3l+bNk264oXGd4V5bW6vXXntNs2bNUvfu3TV27Fjt2rVLv/zlL7V3715t3LhR119/vX74wx9q1KhR2rRpU52ed8yYMXrvvfeUlZWlKVOm1GvdxmjsWKlfvyBciAJDo3TNNWY5OeHOcOjQISspKbFIJGJJSUnWrl07y8jIsOLiYqusrDzpOjt37rRIJGLx8fGWnZ1t+/btq/P2zmTdMI0YYXbvvZ//d3m5Wbt2wdfvfhfWVE0TwWqkqqrMamvD2G7VvyOVmJhoycnJFolErKSkxA4fPlzn5ykvL7dhw4ZZcnKyFRQUWE1NTb3WTU9PP611o6262mzJkhMf++9gmZmNH28mEayGRrBgBw6YLVpklp0929q0aWNdunSx6dOn20svvXRGsaitrbXi4mJLSUmx1NRUe+GFF0573dLS0tOeoyEcP262dKnZOeeY9epl9vHHoY7TbBGsZmrfPrNHHzW7/HKz+HizHj3M7r23zNasWWO1DbxrV1lZabm5uZaQkGAZGRm2bdu2mKzbUDZsMLv4YrP27c1yc80OHYr5CPgngtWMfPyxWXGxWUZGEKmvfc1s5kyzsrJgDyLaKioqbPz48da6dWubOXOmffbZZzFZ93Tt3x/8flq1MotEzPbsifom8SUIlhMjRgTvifz5zyc+npLyxe+T7NhhVlBg9q1vmcXFmX3jG7GN1Mm89NJLlpaWZl27drWioqJ67dGdybp1dfRo8DtLSjIbMsTs9dcbfBM4TQTLiREjzDp1Mhs+/MTHTxas7ds/j1SLFmb9+wcvZd58Mzaz1kVNTY0VFRVZ586dbdCgQVZWVhaTdb/M6tXB7+vss4O90bCijpMjWE6MGGF2221mXbqYrVjx+eP/Haz77gsiNXSo2S9+YRbCWz718sknn9jMmTMtPj7exo8fbx9++GFM1v1vW7aYXXmlWdu2we/54MHTfipEESeOOlKX69UyM6UPPwwuor71VqlXr5iOWG/JyckqLCzU22+/rcrKSvXv3195eXk6dOhQndd98803tX//fg0dOlSHDx+u1/Y//VS6/XZpwIDgFjHvvhuc9JmYeJo/EKKKYDnzZder9eghnXNObGdqCKmpqSotLdXSpUu1ZMkS9e3bV4sXL67TrWi++c1v6uWXX9aaNWvUpk2bOm2vtrZWCxYs0LXXLtEf/xjcHHHVKqlnzzP9SRBNBMuZpn692pgxY7R582bdcsstmjFjhi655BK99dZbdVr33HPPrdNyL7/8sgYOHKi8vDyNG3dE69dLF198BkMjZgiWQ039erXWrVtr1qxZev/993XuuedqyJAhyszM1N69e8/oeXfu3KnMzEx973vf08iRI1VRUaGpU7MUx98CN/ijcur++6XCwqZ576x/6dq1q4qKilReXq6//vWvp30rmqqqKuXl5alv377as2eP/vKXv6iwsFBJSUlRmhzRQrCcSk+XrrxSqq4Oe5LoGzRokMrKyvT4449r0aJFGjBgQJ1uRWNmWr58ufr376+nnnpKy5cv1+rVq3XeeefFYGpERdiHKYH6qKqqstzcXGvTpo2NGjXKNm3adNLl1q1bZxdeeKF17NjR8vPz63XhNhov9rDgSrt27ZSXl6etW7eqa9euGjhwoKZNm6b9//w4od27d2vatGkaPny4evXqpS1btui2225TQkJCyJOjIbQwa4IfYYJm49VXX9VNN92k3bt3KyMjQ8uWLdPQoUP14IMP6vzzzw97PDQwggX3amtrtXDhQu3YsUODBw/WuHHjwh4JUUKwALjBe1gA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANz4f8zphzx5iAzOAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52DB38>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.SanitizeMol(ms)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## With standardization"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cn1cnc2c(=NO)[nH]cnc21\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE5155C0>"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m_std = standardize(m)\n",
"print(Chem.MolToSmiles(m_std))\n",
"Draw.MolToImage(m_std)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N=c1[nH]cnc2[nH]cnc12\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE515828>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ms = MurckoScaffold.GetScaffoldForMol(mol=m_std)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All rules"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[N]=c1[nH]cnc2[nH]cnc12\n",
"UnicodeEncodeError!\n"
]
}
],
"source": [
"ms_all = Chem.Mol(ms)\n",
"Chem.SanitizeMol(ms_all)\n",
"print(Chem.MolToSmiles(ms_all))\n",
"try:\n",
" Draw.MolToImage(ms_all)\n",
"except UnicodeEncodeError:\n",
" print(\"UnicodeEncodeError!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All except FindRadicals"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N=c1[nH]cnc2[nH]cnc12\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52DD30>"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"options = Chem.SANITIZE_ALL ^ Chem.SANITIZE_FINDRADICALS\n",
"ms_norad = Chem.Mol(ms)\n",
"Chem.SanitizeMol(ms_norad, options)\n",
"print(Chem.MolToSmiles(ms_norad))\n",
"Draw.MolToImage(ms_norad)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment