Skip to content

Instantly share code, notes, and snippets.

@jose-manuel
Last active August 28, 2019 16:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jose-manuel/04d69dd3ac52cca74449e73d614df42e to your computer and use it in GitHub Desktop.
Save jose-manuel/04d69dd3ac52cca74449e73d614df42e to your computer and use it in GitHub Desktop.
Possible inconsistency between smiles and drawing of a molecule after murcko scaffold extraction
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Inconsistency with a molecule smiles and its drawing"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import rdkit\n",
"from rdkit import Chem\n",
"from rdkit.Chem.MolStandardize.metal import MetalDisconnector\n",
"from rdkit.Chem.MolStandardize.charge import Uncharger\n",
"from rdkit.Chem.MolStandardize.normalize import Normalizer\n",
"from rdkit.Chem.MolStandardize.tautomer import TautomerCanonicalizer\n",
"from rdkit.Chem import rdmolops\n",
"from rdkit.Chem.Scaffolds import MurckoScaffold\n",
"from rdkit.Chem import Draw"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Init"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def standardize(mol):\n",
" # disconnect metals\n",
" mol = metal_disconnector.disconnect(mol)\n",
" # remove isotopes\n",
" for a in mol.GetAtoms():\n",
" a.SetIsotope(0)\n",
" # normalize functional groups\n",
" mol = normalizer.normalize(mol)\n",
" # remove charges\n",
" mol = uncharger.uncharge(mol)\n",
" # enumerate canonical tautomer\n",
" mol = canonicalizer.canonicalize(mol)\n",
" # remove stereo\n",
" rdmolops.RemoveStereochemistry(mol)\n",
" \n",
" return mol"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"metal_disconnector = MetalDisconnector()\n",
"normalizer = Normalizer()\n",
"uncharger = Uncharger()\n",
"canonicalizer = TautomerCanonicalizer()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cn1cnc2c(NO)ncnc21\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAcRElEQVR4nO3deVRU993H8c8IigsKKjHgxvK4kipYcIkoEApGFKLWGJeqaYwhelJNjk0PmqUkaZ8cejx5RNPEkh2NjVqtFcZKAqg4ijbKE2MCQROHSiJK1EdRRFm/zx+3uAJhmZk7v5nP6xwOdGaY+Xpi3965c+/vGkREQESkgA56D0BE1FIMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCzS3Y0bN5CdnY2VK1diyJAhiI+PR1FRkd5jkR1isEgXpaWlePfddzF9+nR4eXlh5syZMJvNWLVqFTw9PREREYFjx47pPSbZGYOIiN5DkHMoKCiA0WhERkYG8vLy4O/vj+joaMTFxeHhhx9Gp06dAAAighUrVuCjjz7Crl27MH78eJ0nJ3vhqvcA5LgqKyuRk5MDo9GIXbt24dy5cxg3bhzi4+Oxdu1ahISENPp7BoMBa9asgYeHByZNmoQdO3YgJibGxtOTPeIWFlmU2Qzs2gXk5CxAZubf0L17d8TGxt7civLw8GjV8/3pT39CUlISPvnkE8yYMcNKU5MqGCxql7o6IC8PMBq1r8JCYORIYN48IyZO7IWxY8fCxcWlXa+xfv16PPvss3jvvfewcOFCC01OKuJbQicUGQnk5gIHDwK37x7y9gb+8hdg+vTmf//SJSA7W/vauRMoLwcmTAAWLgRmzACGDAGAOIvNu3TpUvTo0QNPPvkkqqursXjxYos9N6mFwXJSvXsDzz+vbR21hNkMZGRoW1G5uYCnJzB5MvDmm8DDDwM9erRvnqNHjyI0NLTJ+3/1q1/B3d0dc+bMwZUrV7BixYr2vSApiYc1OKnFi4HiYmD79qYfk5UFPPssMGiQ9rVpEzBxInDoEFBWBmzYAMya1f5YlZeXY/LkyXjmmWdQX1/f5OOmTZuGv//973jppZewcuXK9r0oKYlbWE7K3R1ISgJWrQIeeQTo2PHex6xcCfTqBSxbBjz6KNCvn3Vm8fDwQFZWFh5++GFcvnwZaWlpcHVt/K9mbGwsMjMzER8fj8rKSqxduxYGg8E6g5Hd4RaWE1u8GHBxAVJTG78/P//WVpa1YtVg1KhR2L9/P/bv34+ZM2fixo0bTT42PDwcWVlZOHLkBJ577jqa2SgjB8NgOTFXVyA5GXj1VeDKFb2nAYYNG4YDBw6gsLAQU6dORUVFRZOPHTNmDN5//1Ns29YV8+cDNTU2HJR0w2A5uWnTgOHDtXDZA19fX5hMJpw/fx5TpkzBlWZKGhgIHDgAHD4MzJwJNLNRRg6CwSKsXg2sXQtcvar3JBpvb2/s27cPVVVViIqKwoULF5p8rL8/sG8fUFQETJkCNLNRRg6AwSKMHQtMnQpUVuo9yS29evXCZ599hs6dOyM8PBylpaVNPnbgQMBkAi5eBGJjtePCyDHxSHeya9euXcOMGTNw8WI5duw4jIEDm/5E8NIlbSuruhrIzATuu8+Gg5JNcAuL7Fq3bt2QkZGB0NAPMXGiAd9+2/Rje/bUPtXs2ROIiADOnLHdnGQbDBbZPTc3N7z9diCiorQDV7/8sunHursD6emAr692utCpU7abk6yPwSIluLgAH3wAzJ4NPPSQ9slgU7p21c5xDA3VAldQYLs5yboYLFKGwQCkpABPPglERwM5OU0/tlMnYPNm7XzHqCiAi5c6BgaLlGIwaIdhvPyy9snmzp1NP9bFBXjvPe20otxc281I1sNPCZ3QyZPaag29e+s9Sfu89Rbw298CH3+sRYkcH7ewnNCyZcC6dXpP0X7PPAO8+y4wf762f6ulIiO1LbW7l9bx9gb+8Q9LTkiWxmA5mdpabXmYiRP1nsQyFiwAPvwQWLoU2Lu35b/XsB4YqYXBcjLHjgHXr2tHtzuKuXO11VMjIlr+Oy1ZD4zsD4PlZEwm4Oc/B7p313sSywoNBTq04m/z7euBcaUHdTBYTsZkavrtYG2tdrDld9/Zdia9/NR6YGR/GCwnIqK9dWoqWP/7v8DnnwM+PradSy/2th4Y/TQGy4kUFQHnz995pZzbmUxASAjQrZtt59KTva0HRs3jmu5OxGTSFr1rahWD5t4uOrLVq7Wj4cn+cQvLiTQXJBHtuCRnDJY9rgdGjeOR7k7Ezw94/XVg3rx77ysoAEaM0N4yqn4EPDkubmE5iR9+AE6f1j4FbIzJBPzsZ4wV2TcGy0nk5mpbWAMHNn6/s+6/IrUwWE7ip4J04ACDRfaPwXISJ09OxaRJ+xq979//BkpKgLAwm45E1GoMlhO4cOEC9u3bjdGjGz8i9NChKxg0qB4DBth4MKJWYrCcgMlkgpeXF4YMGdLo/Xv3Po+IiCU2nko/x49rK5byEvfqYbCcgMlkQnh4OAyGxi+RZTKZ8OCDDrR8w0/Yuxe4fLl1J0uTfeB/MidgMpkwsYk96ufPn8eJEyeavN8RmUxAeLjeU1BbMFgOrqKiAseOHWsySCaTCX369Gny7aIjctYj+h0Bg+Xg8vLy0LVrVwQFBTV6f3NbX47o5Eng3Dl+IqoqBsvBmUwmjB8/Hi4uLo3ev3//fqcK1v79wNChQJ8+ek9CbcFgObjmtqAqKipw/PhxhDvRDh3uv1Ibg+XAqqurceTIkSaDdfDgQXTt2hUjRoyw8WT64SlIamOwHNiRI0dQV1eH0aNHN3q/yWRCWFhYk28XHc2ZM9qFJxgsdTFYDmz//v0YM2YMOnfu3OT9zrb/qn9/wNdX70morRgsB9ZwwGhTfH19ER0dbcOJ9GUyte5SYGR/GCwHVFVVhTVr1qCkpAR1dXVNPm7jxo1Nvl10RNx/pT6HClZz/+d0BvX19di0aROGDRuGNWvWICoqCmvWrEFKSoreo+nu//4PKCxksFTnUMF6+eWXMWHCBGRkZOg9is1lZ2dj9OjRWLJkCR5//HGcOHEC69atw44dO/DCCy9g5cqVeo+oq88//xYREWUYPlzvSag9HCpYCQkJCAkJwezZszFq1Chs2LDB4be6CgoK8Nhjj2Hq1KkIDQ3FqVOn8Morr6BLly4AgNjYWGRmZmL9+vVYvnw5nHUJ/5ycd+DhsQRNnP9NqhAHVFZWJklJSeLp6SkPPPCApKWlSU1Njd5jWVRJSYkkJCSIq6urzJo1S06dOtXs448ePSpeXl7y9NNPS11dnY2mtB9jx46VN954Q+8xqJ0cMlgNysvLJSUlRby9vcXPz09SUlKksrJS77Ha5eLFi5KYmCidO3eW6Ohoyc/Pb/HvFhQUSN++fWXu3LlSXV1txSnty7Vr16RTp07y+eef6z0KtZNDB6tBRUWFpKSkSP/+/aVPnz6SlJQkly9f1nusVqmqqpKUlJSbW40ZGRlteh6z2SwBAQESFxcn169ft/CU9ik7O1vc3d2dKtKOyimC1aCqqkrS0tJk6NCh0qNHD0lMTJQLFy7oPVaz6upE0tKqJSBgmPj5+cnHH3/c7rd0p0+flsGDB8tDDz0kV69etdCk9ispKUliYmL0HoMswKmC1aC2tlY2bdokI0aMEHd3d3nttc1y5ozeU93rn/8UGTFCpHdvkXfe+Vxu3Lhhsec+d+6cjBw5UiZMmKDc1mZrRUVFyWuvvab3GGQBDvUpYUu5uLhg3rx5+PLLL7Fp0yYUFoYjIABYsgQwm/WeDjh6FPjFL4Dp04GHHgK++w546qnRcHNzs9hr3H///di3bx9qa2sRFRWF8+fPW+y5bamoqAirV69GSUlJo/fX1NTg8OHDTnUKkiNzymA1MBgMeOSRR/DJJz7IztZOjh0yBIiPB/LzbT9PSQnw9NPAgw9qV2AuKgLWrgU8Pa3zej179kRWVhZ69uyJiIgInDlzxjovZEHV1dXIzs7Gc889h0GDBiEwMBBbt27F2bNnG318fn4+amtrMWbMGBtPStbg1MG63YQJQEaGtnXTsycwbpwWrkOHrP/aFy8CK1dqsTSbgSNHgK1bAX9/67+2u7s70tPT4evriwkTJuDUqVPWf9FWunDhAv72t79h4cKF6NOnD6ZNm4aCggIsW7YM33//PY4cOYKxYxu/iIbJZEJoaCi6du1q46nJKvR+T2qvvv1WJCFBxNVVJCxMJD3d8q9x7ZpIcrKIh4dIaKjInj2Wf42WqqqqkkcffVR8fHzkq6++0m+Q//j6668lOTlZoqOjxdXVVfz8/CQhIUHS09NbtS8vPj5eEhMTrTgp2RKD9ROKi0WWLxfp0kUkOFgkLU2ktrZ9z1lXJ7J1q4ivr8jAgSKpqdptequtrZVFixZJnz595IsvvrDpa1dWVkpWVpYsX75cBgwYIC4uLhISEiJJSUly9OhRqa+vb/Vz1tfXS69evcRoNFphYtIDg9VCZWUiSUkinp4iDzyghev2g+cjIkQAkYMH7/y9++8X2bHj1v/et08kMFDkvvtE1q0TqaqywfCtUF9fL88995x4enrKwbv/MBZWVlYmaWlpMmvWLHF3d5du3bpJXFycpKamytmzZ9v9/MePH5cOHTrIpUuX2j8s2QUGq5UuXhR59VXtUINXXrl1e0SEdtuDD975+LuDtXevyIsvipSX22DYdkhKSpJu3brJZ599ZrHnrKurk6NHj0pycrKEhYWJwWCQgIAAWb58uWRlZUmVBet9+vRpWbBggQwbNsxiz0n6Y7Da6OpVkdv/4Y6IEElMFPH2Ftm27dbtdwdLJcnJyeLm5iY72vEHuHbtmqSnp0tCQoL07dtXXFxcJCwsTJKTk6WwsNBis9bV1UleXp688MILEhQUJABk0KBB0qlTJ1m7dq3FXof0xWBZSESEyB/+ILJ+vcjgwSINZ4GoHCwRkbfffls6duwoGzZsaPHvmM1mSU1Nlbi4OHFzcxMvLy+ZNWuWpKWlWfQg1YqKipsx9PHxEVdX15sx/Oabb0REZNeuXdKlSxfueHcQDJaFNASrpkZk2DCRN9/Ublc9WCIiGzduFDc3N3nvvfd+8rFnzpwRg8EgwcHB8uKLL8rhw4ctujrEqVOnbsawU6dOLYrhvn37pHv37oyWA3DV+7AKR+PqCiQnA4sXAwsX6j2NZcyfPx/du3fHnDlzUF5ejhUrVjT52L59+6K0tBTe3t4Wee26ujocOnQIRqMR2dnZyM/PR2BgIOLj45GYmIjx48ejQ4fmDyeMiIjAnj17MHnyZJSXl+Ott976yd8h+8RgWcG0acAbb2jhchTTpk3D9u3b8eijj+LHH39EcjN/uPbG6uLFi9izZw8yMjKQkZGBqqoqhIWFYcGCBdixYwcGDBjQ6ucMDQ1Fbm4uYmJicOXKFaSlpcHVlX/9VcP/YlayejUQFaX3FJY1ZcoUZGZmIj4+HiKC5ORkGCy0hKfZbEZGRgaMRiNyc3PRq1cvTJo0Ce+88w5iY2Ph7u7e7td44IEHsHfvXsTExGDmzJnYsmVLk5dAI/vE7WIrGTsWmDoVqKzUexLLCg8PR05ODt5//30sXboU9fX1bXqeGzduIDs7GytXrsTQoUMxePBgbNy4EWFhYTh06BDOnj2LDRs2YNasWRaJVYOhQ4fCZDKhsLAQv/zlL3H9+nWLPTdZn0HESRf5pnYpLCxETEwMIiMjW/z26vz589i9ezeMRiM+/fTTmytFxMfHIz4+Hj4+PjaYXHPu3DlMmjQJnp6eMBqN6NGjh81em9qOwaI2O3HiBGJiYhAcHIytW7c2+vaqoKAARqMRGRkZyMvLg5+fH2JiYhAXF4dJkyZZdMmc1rp06RJiY2NRW1uLzMxMeHl56TYLtQyDRe1SUlKC6Oho9O/fH+np6ejQoQPy8vKQkZGB7du349y5cxg3bhzi4+MRHR2NkJAQvUe+Q3l5OeLi4lBd3RE7d+bA25uX1bFrOh5SQQ6itLRUhg8fLqNGjRI3Nzfp3bu3zJ8/XzZv3qzEeXwVFRWyaNEZGTxY5PRpvaexkJwckchIEXd37cz98eNF7r4OQESEdp7Y3fr1E9m40SZjthZ3ulO7+fj4ICYmBnV1dcjJyUFZWRk2btyI2bNnw9Naqw9aULdu3bB+fV+MHKldGfrkSb0naqft24HYWGDSJG0VyH//G5g7F3jsMeCDD/Serl14WAO1W3l5OdLS0vDBBx8gLCxM73HapFMnYMsW7YDf8HDg00+BoCC9p2qDujrg2WeBZcuAVatu3f6b3wDl5cBvfwvMmQMouqAht7CsKC0N+MMf9J7C+tatWwcfHx9Mnz5d71HaxcVF2wCZPVtbS//wYb0naoOvv9bW+l6w4N775s8HLl8G/vUvm49lKQyWFVVUAP/4h95TWFdlZSXefPNNrFq1yiFOdzEYgJQU4MkngehoICdH74la6cIF7Xu/fvfe17ev9v3HH2/d9j//A3h53fnVxPr49oBvCa0oOBgoKABqaoCOHfWexjpSU1PRpUsXzJ07V+9RLMZg0M5U8PLSDv7dskU73UoJDYdmnDlz6+cGpaXa9/vuu3XbU08Bv/vdnY8bPdp687WT+v8k2rGgIC1WRUV6T2IdNTU1SElJQWJiIjo6YJETE7VzQmfPBrZt03uaZtx+OsXPfqZtXW3ceO/jPv5YuwTTuHG3buveHejf/84vFxerj9xW3MKyInd3ICAA+PJLYMQIvaexvLS0NFRVVeGJJ57QexSreeYZoEcPbffPlSvAokV6T/QfBQWA0QhkZ2s///AD0KGDFpu1a4F587RrxT3+uLZ5v3Ur8N//Dfz5z8rucAe4hWV1wcFasBxNXV0dVq9ejRUrVqBLly56j2NVCxYAH30ELF2q7d/SQ2UlIBlG7Wq/AwZom+9Go7ajLStLi1WDmTOBf/5T+6hz8GDA1xfYtEl7b2s3xW0bHuluZX/8I5Cbq/2dciSbN2/GkiVLcPr0aXh4eOg9jk2kp2tvD999V9visrYffwQyM7Uu7d4NnB32ENwHeQNxccDkydoWlJNhsKwsIwN44olbH944ipCQEEydOhWvvfaa3qPY1MGD2tt7a50rfeyYtr9s1y7t52HDtD5Nnapd7NfZl/DiW0IrCw7WruyswFXgW8xoNKKoqAjLli3TexSbCwu7M1aRkdqninl5dz7O2/vOQ1oiI4GXXrr3+fr31/aFN9i6VTtM6te/Br77DvjmG+0Ty8hIxgrgTnerGzBA+3T52LHGD41R0dtvv4+EhATcd/vH406sd2/g+efvjVZbvP56+5/DkXELywZGjnScHe+5ucCBA5vx/POv6D2K3Vi8GCgu1k7hI+tisGzAkT4pfP11YM4cN/Tr5xw72lvC3R1IStJO3aup0Xsax8Zg2UBQkPaWUHVffKGdqnL3gdGkbWW5uACpqU0/RrGzYOwSg2UDQUFARcUZVFRU6z1Ku/zxj9oKJYMH6z2J/Wm4vNurr2oHmDbmqae0f7hu/+rTx3YzOgIGywYCA2tw4UIAvvoqX+9R2qyoCNi5k1tXzZk2DRg+vOnLuyl2FoxdYrBsoGPHjggMDMSXCu/ISk4GpkwBRo3SexL7tnq1dmbM1at6T+KYGCwbCQ4OVjZY338PfPKJdjIwNc9RL+9mL3iku42kpKRgy5YtOHTokN6jtNpvfqOdX7t3r96TkLPjFpaNBAUF4auvvmrzhUf1UlamrcJ5+2q7RHphsGxk1KhRqKysxLfffnvPfVea+ljJDqxZo53PFhOj9yREPDXHZjw9PTFgwAAcO3YMQ4cOveO+4OBglJaWol+/fggICLjna8iQIejevbvNZy4vB/7yF20Ly8DL9ZEdYLBsqGHH++zZs++4fe/evTCbzSguLr75fefOnTCbzSgrKwMAeHt7IyAgAP7+/ggKmgEvr5nw9wf8/a338fiHH2rLgCt+bQlyINzpbkO///3vkZ+fj127drX4dyorK++JWW1tJHJzp6O4GLh2TbtE1cCB2uqmDRG7/ee2LptUV6ctZOnr27bfJ7I0BsuGtm/fjuXLl+OMBdeaKSvTTrwtLgbM5jt//uEHoLYW8PAA9uwBfv5zi70skS4YLBs6deoUBg0ahLKyMvSxwTkZNTVASYkWsHHjtJN0iVTGYNmQiMDT0xPbtm1DDD92I2o1HtZgQwaDASNHjkR+vrrnFBLpiVtYNvbXv/4VixYtQteuXRs9hCEgIAADBw6Eqw3Ww42M1BbkO3gQGD/+1u3e3trhDPx0kOwND2uwsblz5yI4OPjmJ37FxcUoKirC7t27YTabUVFRgY4dO2LgwIHw9/eHv7//zcMZtJ//C15elrtaiiWX9yWyNgbLxgwGAwIDAxEYGNjo/ZcuXYLZbL7jKycnB6WlpSguLsaIES/g+PGX0LevdujC3V9Dh7Zu5/rixUBamra878yZFvpDElkJ3xIqpLa2Fj/8UAGz2fOeQxiKi7Xr2AGAj8+t47Du/t6v361rbkZGatfh9PLSVsMsKNAuEsy3hGSvuIWlEFdXV/j5ecLPD4iKuvf+qirtcmJm862vb77RrnF38qS2RlNaGrBw4Z2/t3ixtoZTaqq2MgORvWKwHIib2623ho05dw7o2vXe2xuW9128+N6YEdkTHtbgRLy9m75i8U8t70tkD7iFRTetXt34W00ie8EtLLqJy/uSveOnhESkDG5hEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMpgsIhIGQwWESmDwSIiZTBYRKQMBouIlMFgEZEyGCwiUgaDRUTKYLCISBkMFhEpg8EiImUwWESkDAaLiJTBYBGRMhgsIlIGg0VEymCwiEgZDBYRKYPBIiJlMFhEpAwGi4iUwWARkTIYLCJSBoNFRMr4fwXZuAjdXolyAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE515400>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = Chem.MolFromSmiles('Cn1cnc2c(NO)ncnc21')\n",
"print(Chem.MolToSmiles(m))\n",
"Draw.MolToImage(m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Without standardization"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c1ncc2nc[nH]c2n1\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAUbUlEQVR4nO3de3BUhdnH8R+BEG5pMDANCNhSLgYM1eEWsFawRS0aiiNCneIGiiGIM4Cm1ntJrFLSajGRqRipDgGtCEhtUNOKr1pinSBCRVQkYJFBoCBSJU24hPC8f5y2lhY0geyePMn3M5NRN+fseRKGr2f3XLaFmZkAwIG4sAcAgLoiWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWHBr5EipRQvp9ddPfLxLF+nZZ8OYCNFGsOBap07SLbeEPQVihWDBtawsaft26Zlnwp4EsUCw4FqHDlJurnTHHVJNTdjTINoIFtzLypJatpSKisKeBNFGsOBeq1ZSfr50zz3SwYNhT4NoIlhoEsaOlfr1C8KFpotgOVHXQ/gjR0p33/2/63fvLj3xRBQHbATuv18qLJQqK8OeBNFCsBxpzofwzaQlS6SrrpKOHz/5Munp0pVXStXVJz5eVSWNGiWVlkZ9TEQZwXKkuR7CX7tWGj5cmjlTGjEiiJckvfrq/+5NLlsWfP+qqz5/rEWLIGZXXx0EbcuWWE2OhkawHGluh/B375amTZMuukjq21d6/33p5puDI4L10a6dNGeOVFER7KUOGCDNmiV9+mlUxkYUESxn6nIIf948qXPnE7/27IndjGfq6NHgvah+/YK9oXXrpMWLpZSUM3veHj2C53nxRelPf5J69Qq2U1vbMHMj+giWM3U5hD91qvTWWyd+ffWrsZvxTKxaFYTqgQek+fOlV16RLrigYbcxcqS0YYP04IPSz38uDR4cBAyNH8Fy6MsO4ScmBkcF//Orvi+jYm3zZmn0aOnaa6VIJHj5lpkZvP8UDXFxwfNv2SJdfrl02WXSmDHBe4RovAiWU03lEP6BA8H7SeefH4T2vfekvDypbdvYbL9jxyD8mzYFb9anpgbzeP+9NlUEy6lTHcL3oqamRo88sk+9e0vl5cFLsmXLpK99LZx5+vaVnnsuOAJbWir17y+tWPGJ7F+HJNEotDD+RJqlo0el1q3D2fYf/vAH5eTk6OyzL1Ak8ltFIsFLtLqtK/3610FYojX/0aPSQw9JixePV2LibhUUFGjIkCHR2RjqhT2sZio/X/r614OXP6+99vm5TdG0detWTZgwQWPHjtWll16qlSsf0aRJdYtVRYWUkRGcX5WWFt0je61bByfovvLKIxo8eLAuvPBCTZgwQTt37ozeRlE3hmbpk0/MiovNMjLMWrc269HDLDvbrKTErKamYbdVWVlpubm5lpCQYBkZGbZt27Z6rGuWm2uWkBDM+sEHDTtbXWzYsMEuvvhia9++veXm5tqhQ4diPwTMzIxgwf7+d7Nly8wiEbP27c06dQr+vaTE7OjR03/e2tpaKy4utpSUFEtNTbXS0tJ6rBsENSXFLDXVrB6rRk1JSYn17NnTevToYcXFxWGP0ywRrEaqqir4SxtrBw+aLV1qNn58EK8rrvg/mzJlij3//PN2+PDhOj9PeXm5paenW3JyshUUFFhNPXbbysvN0tPNkpPNCgoafo/vTFRXV1t+fr4lJibaJZdcYhs3bgx7pGaFYDVS11xjlpMT7gzV1Wa///3LNnHiREtKSrKkpCSbOHGirVy50qqrq0+6zs6dOy0SiVh8fLxlZ2fbxx9/XOft7dwZ7NnFxwcvT/fta6ifpOF99NFHFolErGXLlhaJRGzv3r1hj9QsEKxG6JFHzDp0MNuyJexJPnfs2DErKyuzmTNnWkpKirVt29YyMjKsuLjYPvvsM6uqqrL8/Hzr0KGDffe737W33367zs9dVVVls2fPtgsueNMuu8zs3Xej+IM0sDfeeMOGDx9uZ511luXn59uRI0fCHqlJI1iNzLvvmrVrZ/bEE2FPcmo1NTW2evVqmzZtmqWkpFibNm2sd+/e1qdPHyspKanz8xw/ftyefPJJ6969u/Xp08deeGFNFKeOntraWnvsscesS5cuNm7cODt6Jm/84QsRrEbk8GGz8883mzw57Enqrra21ubOnWvJycn12rtYv369XXTRRf8+8laf98caqz179pgk27x5c9ijNFmch9WI/PjHwc3mHnoo7EnqLi4uTmlpaYqPj1frOpzJuX//fs2aNUvp6enq2bOntm3bpry8PCUkJMRg2ujq0KFD2CM0ea3CHgCBF16QFi6U1qwJrqlrampqavTwww9r9uzZSk1NVVlZmYYNGxb2WPW2ceNGVVdXa/jw4WGP0iwRrEZg1y5p0iRp7tzgGsGmZtWqVbr55pt1+PBhzZ8/X5FIRC2idRuGKFuyZIk++ugjghUSXhKG7Pjx4DYngwYFd9NsaoqLi3XttdcqEomooqJCmZmZbmOF8LGHFbI5c6R33pE2bozevZ/CNGHCBH3nO99Rjx49wh4FTQDBCtEbb0j33SetXBl8XFdT1LZtW2KFBsNLwpB8+mmlIpHDmjUruK8VgC9HsEKSnX29+vW7QXPmhD0J4AcvCUOwcOFClZaWav369YqPD3sawA/2sGJs69atysnJ0YIFC9S3b9+wxwFcIVgxdOTIEU2YMEFXX321rrvuurDHAdwhWDH0k5/8RJWVlZo/f37YowAu8R5WjJSWlqqoqEhr1qzRV77ylbDHwWlKTOyu5OSTXzPZokVLpaYOVFxcmxhP1XwQrBjYtWuXMjMzNWfOHKU3xWtvmpHKypt04MDJv2fWVu+/v17Hj8d2puaEl4RRdvz4cU2aNEkDBw5UTk5O2OMArhGsKJs7d642bdqkRYsWKe4/Ps/q2LFjWrFiRYiTAf7wkjCK1q1bp5/97Gd65pln1LVr1xO+t2PHDk2ePFmHDh1SJBIJaULAF/awoigrK0s33nijMjIy/ud7vXr1UkFBgW688UZt2bIlhOkAfwhWiLKysjRmzBhNnDhRR48eDXscoNEjWFH0m9/8Rg8//LCee+65Uy6zYMECHThwQHfffXcMJwN8IlhRNGTIEM2ePVvXX3+99uzZc9JlkpKStHTpUhUWFur555+P8YSALwQryu644w4NGDBAkydP1vFTnKAzdOhQ/fSnP9WUKVP0t7/9LcYTAn4QrCiLi4tTcXGxNmzYoHnz5p1yuTvvvPPfYTOzGE4I+EGwYqBbt25avHix7rrrLq1du/aky/wrbOvXr//CsCFcbdtK7dqFPUXzRbBiZPTo0Zo2bZomTpyogwcPnnSZbt266bHHHlNpabrWr4/xgKiTe++VHn887CmaL4IVQ/fff78SExM1Y8aMUy7z/e9/X/36XaQf/EA6RdeAZotgxVBCQoKWLVumlStX6oknnjjlcg88EHyY6syZMRwOZ6xVK+nSS6X27cOepOkiWDHWp08fzZs3T9OnT1dFRcVJl0lIkJ58Ulq+XPqCrqGRadNGevFFiQ8Jih6CFYKpU6dq9OjRuvXWRaqpOfky/ftLDz4oTZ8unaJrQLNDsELy6KOPafPmObrrrlMvk50tZWRIEydKXLkDEKzQdOyYqCVLWqiwUPqiE9wXLJD275dmz47dbEBjRbBCNHSodPfd0pQp0qlOcO/YUXr6aamgQFq9OpbTAY0PwQrZXXdJaWnS5MnSqU5wHzo0WG7SpGBvy5sZM2bo9ttv1z/+8Y+wR4mKkSOlFi2k118/8fEuXaRnnw1joqaLYIUsLk5avFhavz54k/1U7rxTmjNHOuus2M3WUEaNGqUVK1bovPPO09NPP90kLz3q1Em65Zawp2j6CFYj0K2bVFws3XGHdIord9SypfSjHwX/9Gbs2LHavHmzcnJylJ2drWHDhqm8vDzssRpUVpa0fbv0zDNhT9K0EaxG4oorpKlTpeuukyorw56m4cXHx2vWrFn64IMPNGzYMH37299WZmZmk7k7RYcOUm5u8D+dU52qgjNHsBqRX/0qOEva0xnutbW1euedd1RTU1Onu6Z27txZhYWFWrt2rbZv367evXsrLy9PR44cicG00ZWVFewBFxWFPUnTRbAakYQE6be/lZYtC850b6yOHTum1atX64YbblC3bt10zz33KDk5WWlpaVq1alWdnmPgwIEqKyvTU089pUWLFiktLU3Lly+P8uTR1aqVlJ8v3XMP14FGC8FqZPr3l+bNk264oXGd4V5bW6vXXntNs2bNUvfu3TV27Fjt2rVLv/zlL7V3715t3LhR119/vX74wx9q1KhR2rRpU52ed8yYMXrvvfeUlZWlKVOm1GvdxmjsWKlfvyBciAJDo3TNNWY5OeHOcOjQISspKbFIJGJJSUnWrl07y8jIsOLiYqusrDzpOjt37rRIJGLx8fGWnZ1t+/btq/P2zmTdMI0YYXbvvZ//d3m5Wbt2wdfvfhfWVE0TwWqkqqrMamvD2G7VvyOVmJhoycnJFolErKSkxA4fPlzn5ykvL7dhw4ZZcnKyFRQUWE1NTb3WTU9PP611o6262mzJkhMf++9gmZmNH28mEayGRrBgBw6YLVpklp0929q0aWNdunSx6dOn20svvXRGsaitrbXi4mJLSUmx1NRUe+GFF0573dLS0tOeoyEcP262dKnZOeeY9epl9vHHoY7TbBGsZmrfPrNHHzW7/HKz+HizHj3M7r23zNasWWO1DbxrV1lZabm5uZaQkGAZGRm2bdu2mKzbUDZsMLv4YrP27c1yc80OHYr5CPgngtWMfPyxWXGxWUZGEKmvfc1s5kyzsrJgDyLaKioqbPz48da6dWubOXOmffbZZzFZ93Tt3x/8flq1MotEzPbsifom8SUIlhMjRgTvifz5zyc+npLyxe+T7NhhVlBg9q1vmcXFmX3jG7GN1Mm89NJLlpaWZl27drWioqJ67dGdybp1dfRo8DtLSjIbMsTs9dcbfBM4TQTLiREjzDp1Mhs+/MTHTxas7ds/j1SLFmb9+wcvZd58Mzaz1kVNTY0VFRVZ586dbdCgQVZWVhaTdb/M6tXB7+vss4O90bCijpMjWE6MGGF2221mXbqYrVjx+eP/Haz77gsiNXSo2S9+YRbCWz718sknn9jMmTMtPj7exo8fbx9++GFM1v1vW7aYXXmlWdu2we/54MHTfipEESeOOlKX69UyM6UPPwwuor71VqlXr5iOWG/JyckqLCzU22+/rcrKSvXv3195eXk6dOhQndd98803tX//fg0dOlSHDx+u1/Y//VS6/XZpwIDgFjHvvhuc9JmYeJo/EKKKYDnzZder9eghnXNObGdqCKmpqSotLdXSpUu1ZMkS9e3bV4sXL67TrWi++c1v6uWXX9aaNWvUpk2bOm2vtrZWCxYs0LXXLtEf/xjcHHHVKqlnzzP9SRBNBMuZpn692pgxY7R582bdcsstmjFjhi655BK99dZbdVr33HPPrdNyL7/8sgYOHKi8vDyNG3dE69dLF198BkMjZgiWQ039erXWrVtr1qxZev/993XuuedqyJAhyszM1N69e8/oeXfu3KnMzEx973vf08iRI1VRUaGpU7MUx98CN/ijcur++6XCwqZ576x/6dq1q4qKilReXq6//vWvp30rmqqqKuXl5alv377as2eP/vKXv6iwsFBJSUlRmhzRQrCcSk+XrrxSqq4Oe5LoGzRokMrKyvT4449r0aJFGjBgQJ1uRWNmWr58ufr376+nnnpKy5cv1+rVq3XeeefFYGpERdiHKYH6qKqqstzcXGvTpo2NGjXKNm3adNLl1q1bZxdeeKF17NjR8vPz63XhNhov9rDgSrt27ZSXl6etW7eqa9euGjhwoKZNm6b9//w4od27d2vatGkaPny4evXqpS1btui2225TQkJCyJOjIbQwa4IfYYJm49VXX9VNN92k3bt3KyMjQ8uWLdPQoUP14IMP6vzzzw97PDQwggX3amtrtXDhQu3YsUODBw/WuHHjwh4JUUKwALjBe1gA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANz4f8zphzx5iAzOAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52D7B8>"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ms = MurckoScaffold.GetScaffoldForMol(mol=m)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c1ncc2nc[nH]c2n1\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAUbUlEQVR4nO3de3BUhdnH8R+BEG5pMDANCNhSLgYM1eEWsFawRS0aiiNCneIGiiGIM4Cm1ntJrFLSajGRqRipDgGtCEhtUNOKr1pinSBCRVQkYJFBoCBSJU24hPC8f5y2lhY0geyePMn3M5NRN+fseRKGr2f3XLaFmZkAwIG4sAcAgLoiWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWHBr5EipRQvp9ddPfLxLF+nZZ8OYCNFGsOBap07SLbeEPQVihWDBtawsaft26Zlnwp4EsUCw4FqHDlJurnTHHVJNTdjTINoIFtzLypJatpSKisKeBNFGsOBeq1ZSfr50zz3SwYNhT4NoIlhoEsaOlfr1C8KFpotgOVHXQ/gjR0p33/2/63fvLj3xRBQHbATuv18qLJQqK8OeBNFCsBxpzofwzaQlS6SrrpKOHz/5Munp0pVXStXVJz5eVSWNGiWVlkZ9TEQZwXKkuR7CX7tWGj5cmjlTGjEiiJckvfrq/+5NLlsWfP+qqz5/rEWLIGZXXx0EbcuWWE2OhkawHGluh/B375amTZMuukjq21d6/33p5puDI4L10a6dNGeOVFER7KUOGCDNmiV9+mlUxkYUESxn6nIIf948qXPnE7/27IndjGfq6NHgvah+/YK9oXXrpMWLpZSUM3veHj2C53nxRelPf5J69Qq2U1vbMHMj+giWM3U5hD91qvTWWyd+ffWrsZvxTKxaFYTqgQek+fOlV16RLrigYbcxcqS0YYP04IPSz38uDR4cBAyNH8Fy6MsO4ScmBkcF//Orvi+jYm3zZmn0aOnaa6VIJHj5lpkZvP8UDXFxwfNv2SJdfrl02WXSmDHBe4RovAiWU03lEP6BA8H7SeefH4T2vfekvDypbdvYbL9jxyD8mzYFb9anpgbzeP+9NlUEy6lTHcL3oqamRo88sk+9e0vl5cFLsmXLpK99LZx5+vaVnnsuOAJbWir17y+tWPGJ7F+HJNEotDD+RJqlo0el1q3D2fYf/vAH5eTk6OyzL1Ak8ltFIsFLtLqtK/3610FYojX/0aPSQw9JixePV2LibhUUFGjIkCHR2RjqhT2sZio/X/r614OXP6+99vm5TdG0detWTZgwQWPHjtWll16qlSsf0aRJdYtVRYWUkRGcX5WWFt0je61bByfovvLKIxo8eLAuvPBCTZgwQTt37ozeRlE3hmbpk0/MiovNMjLMWrc269HDLDvbrKTErKamYbdVWVlpubm5lpCQYBkZGbZt27Z6rGuWm2uWkBDM+sEHDTtbXWzYsMEuvvhia9++veXm5tqhQ4diPwTMzIxgwf7+d7Nly8wiEbP27c06dQr+vaTE7OjR03/e2tpaKy4utpSUFEtNTbXS0tJ6rBsENSXFLDXVrB6rRk1JSYn17NnTevToYcXFxWGP0ywRrEaqqir4SxtrBw+aLV1qNn58EK8rrvg/mzJlij3//PN2+PDhOj9PeXm5paenW3JyshUUFFhNPXbbysvN0tPNkpPNCgoafo/vTFRXV1t+fr4lJibaJZdcYhs3bgx7pGaFYDVS11xjlpMT7gzV1Wa///3LNnHiREtKSrKkpCSbOHGirVy50qqrq0+6zs6dOy0SiVh8fLxlZ2fbxx9/XOft7dwZ7NnFxwcvT/fta6ifpOF99NFHFolErGXLlhaJRGzv3r1hj9QsEKxG6JFHzDp0MNuyJexJPnfs2DErKyuzmTNnWkpKirVt29YyMjKsuLjYPvvsM6uqqrL8/Hzr0KGDffe737W33367zs9dVVVls2fPtgsueNMuu8zs3Xej+IM0sDfeeMOGDx9uZ511luXn59uRI0fCHqlJI1iNzLvvmrVrZ/bEE2FPcmo1NTW2evVqmzZtmqWkpFibNm2sd+/e1qdPHyspKanz8xw/ftyefPJJ6969u/Xp08deeGFNFKeOntraWnvsscesS5cuNm7cODt6Jm/84QsRrEbk8GGz8883mzw57Enqrra21ubOnWvJycn12rtYv369XXTRRf8+8laf98caqz179pgk27x5c9ijNFmch9WI/PjHwc3mHnoo7EnqLi4uTmlpaYqPj1frOpzJuX//fs2aNUvp6enq2bOntm3bpry8PCUkJMRg2ujq0KFD2CM0ea3CHgCBF16QFi6U1qwJrqlrampqavTwww9r9uzZSk1NVVlZmYYNGxb2WPW2ceNGVVdXa/jw4WGP0iwRrEZg1y5p0iRp7tzgGsGmZtWqVbr55pt1+PBhzZ8/X5FIRC2idRuGKFuyZIk++ugjghUSXhKG7Pjx4DYngwYFd9NsaoqLi3XttdcqEomooqJCmZmZbmOF8LGHFbI5c6R33pE2bozevZ/CNGHCBH3nO99Rjx49wh4FTQDBCtEbb0j33SetXBl8XFdT1LZtW2KFBsNLwpB8+mmlIpHDmjUruK8VgC9HsEKSnX29+vW7QXPmhD0J4AcvCUOwcOFClZaWav369YqPD3sawA/2sGJs69atysnJ0YIFC9S3b9+wxwFcIVgxdOTIEU2YMEFXX321rrvuurDHAdwhWDH0k5/8RJWVlZo/f37YowAu8R5WjJSWlqqoqEhr1qzRV77ylbDHwWlKTOyu5OSTXzPZokVLpaYOVFxcmxhP1XwQrBjYtWuXMjMzNWfOHKU3xWtvmpHKypt04MDJv2fWVu+/v17Hj8d2puaEl4RRdvz4cU2aNEkDBw5UTk5O2OMArhGsKJs7d642bdqkRYsWKe4/Ps/q2LFjWrFiRYiTAf7wkjCK1q1bp5/97Gd65pln1LVr1xO+t2PHDk2ePFmHDh1SJBIJaULAF/awoigrK0s33nijMjIy/ud7vXr1UkFBgW688UZt2bIlhOkAfwhWiLKysjRmzBhNnDhRR48eDXscoNEjWFH0m9/8Rg8//LCee+65Uy6zYMECHThwQHfffXcMJwN8IlhRNGTIEM2ePVvXX3+99uzZc9JlkpKStHTpUhUWFur555+P8YSALwQryu644w4NGDBAkydP1vFTnKAzdOhQ/fSnP9WUKVP0t7/9LcYTAn4QrCiLi4tTcXGxNmzYoHnz5p1yuTvvvPPfYTOzGE4I+EGwYqBbt25avHix7rrrLq1du/aky/wrbOvXr//CsCFcbdtK7dqFPUXzRbBiZPTo0Zo2bZomTpyogwcPnnSZbt266bHHHlNpabrWr4/xgKiTe++VHn887CmaL4IVQ/fff78SExM1Y8aMUy7z/e9/X/36XaQf/EA6RdeAZotgxVBCQoKWLVumlStX6oknnjjlcg88EHyY6syZMRwOZ6xVK+nSS6X27cOepOkiWDHWp08fzZs3T9OnT1dFRcVJl0lIkJ58Ulq+XPqCrqGRadNGevFFiQ8Jih6CFYKpU6dq9OjRuvXWRaqpOfky/ftLDz4oTZ8unaJrQLNDsELy6KOPafPmObrrrlMvk50tZWRIEydKXLkDEKzQdOyYqCVLWqiwUPqiE9wXLJD275dmz47dbEBjRbBCNHSodPfd0pQp0qlOcO/YUXr6aamgQFq9OpbTAY0PwQrZXXdJaWnS5MnSqU5wHzo0WG7SpGBvy5sZM2bo9ttv1z/+8Y+wR4mKkSOlFi2k118/8fEuXaRnnw1joqaLYIUsLk5avFhavz54k/1U7rxTmjNHOuus2M3WUEaNGqUVK1bovPPO09NPP90kLz3q1Em65Zawp2j6CFYj0K2bVFws3XGHdIord9SypfSjHwX/9Gbs2LHavHmzcnJylJ2drWHDhqm8vDzssRpUVpa0fbv0zDNhT9K0EaxG4oorpKlTpeuukyorw56m4cXHx2vWrFn64IMPNGzYMH37299WZmZmk7k7RYcOUm5u8D+dU52qgjNHsBqRX/0qOEva0xnutbW1euedd1RTU1Onu6Z27txZhYWFWrt2rbZv367evXsrLy9PR44cicG00ZWVFewBFxWFPUnTRbAakYQE6be/lZYtC850b6yOHTum1atX64YbblC3bt10zz33KDk5WWlpaVq1alWdnmPgwIEqKyvTU089pUWLFiktLU3Lly+P8uTR1aqVlJ8v3XMP14FGC8FqZPr3l+bNk264oXGd4V5bW6vXXntNs2bNUvfu3TV27Fjt2rVLv/zlL7V3715t3LhR119/vX74wx9q1KhR2rRpU52ed8yYMXrvvfeUlZWlKVOm1GvdxmjsWKlfvyBciAJDo3TNNWY5OeHOcOjQISspKbFIJGJJSUnWrl07y8jIsOLiYqusrDzpOjt37rRIJGLx8fGWnZ1t+/btq/P2zmTdMI0YYXbvvZ//d3m5Wbt2wdfvfhfWVE0TwWqkqqrMamvD2G7VvyOVmJhoycnJFolErKSkxA4fPlzn5ykvL7dhw4ZZcnKyFRQUWE1NTb3WTU9PP611o6262mzJkhMf++9gmZmNH28mEayGRrBgBw6YLVpklp0929q0aWNdunSx6dOn20svvXRGsaitrbXi4mJLSUmx1NRUe+GFF0573dLS0tOeoyEcP262dKnZOeeY9epl9vHHoY7TbBGsZmrfPrNHHzW7/HKz+HizHj3M7r23zNasWWO1DbxrV1lZabm5uZaQkGAZGRm2bdu2mKzbUDZsMLv4YrP27c1yc80OHYr5CPgngtWMfPyxWXGxWUZGEKmvfc1s5kyzsrJgDyLaKioqbPz48da6dWubOXOmffbZZzFZ93Tt3x/8flq1MotEzPbsifom8SUIlhMjRgTvifz5zyc+npLyxe+T7NhhVlBg9q1vmcXFmX3jG7GN1Mm89NJLlpaWZl27drWioqJ67dGdybp1dfRo8DtLSjIbMsTs9dcbfBM4TQTLiREjzDp1Mhs+/MTHTxas7ds/j1SLFmb9+wcvZd58Mzaz1kVNTY0VFRVZ586dbdCgQVZWVhaTdb/M6tXB7+vss4O90bCijpMjWE6MGGF2221mXbqYrVjx+eP/Haz77gsiNXSo2S9+YRbCWz718sknn9jMmTMtPj7exo8fbx9++GFM1v1vW7aYXXmlWdu2we/54MHTfipEESeOOlKX69UyM6UPPwwuor71VqlXr5iOWG/JyckqLCzU22+/rcrKSvXv3195eXk6dOhQndd98803tX//fg0dOlSHDx+u1/Y//VS6/XZpwIDgFjHvvhuc9JmYeJo/EKKKYDnzZder9eghnXNObGdqCKmpqSotLdXSpUu1ZMkS9e3bV4sXL67TrWi++c1v6uWXX9aaNWvUpk2bOm2vtrZWCxYs0LXXLtEf/xjcHHHVKqlnzzP9SRBNBMuZpn692pgxY7R582bdcsstmjFjhi655BK99dZbdVr33HPPrdNyL7/8sgYOHKi8vDyNG3dE69dLF198BkMjZgiWQ039erXWrVtr1qxZev/993XuuedqyJAhyszM1N69e8/oeXfu3KnMzEx973vf08iRI1VRUaGpU7MUx98CN/ijcur++6XCwqZ576x/6dq1q4qKilReXq6//vWvp30rmqqqKuXl5alv377as2eP/vKXv6iwsFBJSUlRmhzRQrCcSk+XrrxSqq4Oe5LoGzRokMrKyvT4449r0aJFGjBgQJ1uRWNmWr58ufr376+nnnpKy5cv1+rVq3XeeefFYGpERdiHKYH6qKqqstzcXGvTpo2NGjXKNm3adNLl1q1bZxdeeKF17NjR8vPz63XhNhov9rDgSrt27ZSXl6etW7eqa9euGjhwoKZNm6b9//w4od27d2vatGkaPny4evXqpS1btui2225TQkJCyJOjIbQwa4IfYYJm49VXX9VNN92k3bt3KyMjQ8uWLdPQoUP14IMP6vzzzw97PDQwggX3amtrtXDhQu3YsUODBw/WuHHjwh4JUUKwALjBe1gA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANwgWADcIFgA3CBYANz4f8zphzx5iAzOAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52DB38>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.SanitizeMol(ms)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## With standardization"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cn1cnc2c(=NO)[nH]cnc21\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAcsUlEQVR4nO3de1hVdb7H8Q8oIIiAgndUIK90FMzrKIgxYEOC6JiZljk5ZHZOkx3zDNppwmrOPDbWANaTQ1NNeBkNc7wApQlHkWRMpbRC0RkgOYJQllqIcf2eP34P3rjIZbPX/u39eT3PflJYG748xbu11l77t+xEREBEpAF7owcgImotBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWGe6nn35CRkYGVq1aheHDhyMqKgr5+flGj0UWiMEiQ5SWluIvf/kLZs+eDS8vL8ydOxeFhYVYvXo1PDw8EBISghMnThg9JlkYOxERo4cg25CXl4e0tDSkpqYiJycHvr6+CAsLQ2RkJO677z44OjoCAEQEK1aswHvvvYf09HRMmTLF4MnJUnQ1egCyXpWVlcjMzERaWhrS09NRVlaGyZMnIyoqComJiRg3blyTz7Ozs0N8fDzc3d0xY8YM7Ny5E+Hh4WaeniwR97DIpAoLgfR0IDNzEfbu3Y4ePXogIiLi+l6Uu7t7m77eK6+8gri4OGzduhVz5szppKlJFwwWdUhdHZCTA6SlqcepU8CYMcDChWkIDu6FSZMmoUuXLh36Hhs2bMDy5cvx9ttv49FHHzXR5KQjHhLaoOnTgaws4PBh4ObTQ/36AX/+MzB7dsvPv3QJyMhQj927gStXgKAg4NFHgTlzgOHDASDSZPM++eSTcHNzw69//WtUV1cjJibGZF+b9MJg2ShPT2DlSrV31BqFhUBqqtqLysoCPDyAX/wCeP114L77ADe3Th0XDz/8MFxdXfHQQw/hhx9+wIoVKzr3G5JF4mUNNiomBigqAnbsaH6b/fuB5cuBoUPVY8sWIDgY+Mc/gPJyYONGYN68jsfq1KlT+OMf/4iQkBCkp6c3u110dDT+/ve/4/nnn8eqVas69k1JS9zDslGurkBcHLB6NTBrFuDg0HibVauAXr2A3/wGeOABYOBA03zvqqoqZGVlXX/18Ouvv8b48eMRGRkJf3//Fp8bERGBvXv3IioqCpWVlUhMTISdnZ1pBiOLx2DZsJgYIDERSEoCnnqq8edzc033vS5evIgDBw4gNTUVe/bsQU1NDUJDQxEbG4uZM2diYBtqOG3aNOzfvx/Ll/8OzzxzDfHxLrDnsYJNYLBsWNeuwNq1KlymfvFNBPj8cyAj4x188EEScnNz4ePjg8jISGzfvh3Tpk2Dk5NTu7/+xIkT8c47+xAeDnz7LZCc3PReIlkX/n/JxkVHA6NGqXB1VGUlsGcPsHQp4O0NTJoEnDnTFw888AC+/PJLFBQUIDExEeHh4R2KVQN/f+CTT4AjR4C5c4Gffur4z0CWjXtYhHXrgNDQ9j333Dlg3z51icNHHwGOjsDPf67Oj0VHA337RsKUlzjcztcXOHgQCAsD7r9fBdPVtdO+HRmMwSJMmgTMnAls337nbevr1aFewyUOn32mohEZCezcCYSEmP/QbPBgIDsbmDEDiIhQc7XxgnrSBK90pzaZMQM4dEiFKTJShc7Pz+iplEuX1F5WdTWwdy/Qu7fRE5GpMVjUJmfOqMsbLPWwq6JCXalfWqquIzPVpRhkGRgssjqVleokfH6+Ord2111GT0SmwlcJyeq4uKj3OI4fr67Mz8szeiIyFQaLrJKjI7Btm3q/Y2gowMVLrQODRVarSxfg7bfV24qysoyehkyB57Bs0NmzarUGT0+jJyFqG+5h2aDf/AZYv97oKYwzfTpgZ9d4aZ1+/YBdu27d7vnnGz/f2xvYvLkTB6RmMVg2prZWLQ8THGz0JMZqWA+M9MJg2ZgTJ4Br19TV7basNeuBkeVhsGxMdjZwzz1Ajx5GT2Ksm9cDq6kxehpqLQbLxmRn83CwQUyMeiUxKan5bf70J8DL69bHhQvmm5FuxWDZEBF14wkGS2lYD+zFF4Effmh6m8cfV4fRNz/69DHfjHQrrtZgQ/Lz1WJ3vJHyDdHRwGuvNb8eWI8e6lXBm3XwrmXUAQyWDcnOVovecRWDW3VkPTAyLx4S2hCev2paw3pglZVGT0J3wivdbYiPD/CHPwALFxo9CVH7cA/LRpw/r5YzDgoyehKi9mOwbERWltrDGjzY6EmI2o/BshE8f0XWgMGyEWfPzsSMGQeNHoOoQ3jS3QZcvHgRffr0wenTpzFixAijxyFqN+5h2YDs7Gx4eXlh+PDhRo9iEb74Qt3HsL7e6EmorRgsG5CdnY1p06bBzs7O6FEswoEDwOXLgD3/69cO/5XZgOzsbATzjPt12dnAtGlGT0HtwWBZuYqKCpw4cYLBuklODl8x1RWDZeVycnLg4uKCgIAAo0exCGfPAmVlwNSpRk9C7cFgWbns7GxMmTIFXbjEAADg0CFgxAguEaMrBsvK8fzVrXj+Sm8MlhWrrq7GsWPHGKyb8Ip/vTFYVuzYsWOoq6vDhAkTjB7FIpSUqBtPMFj6YrCs2KFDhzBx4kR069bN6FEswqFDavXQIUOMnoTai8GyYg0XjJKSnQ2EhBg9BXUEg2WFqqqqEB8fj+LiYtTV1Rk9jsXg+Sv9WVWwbP2Xs76+Hlu2bMHIkSMRHx+P0NBQxMfHIyEhwejRDPf998CpUwyW7qwqWL/73e8QFBSE1NRUo0cxu4yMDEyYMAHLli3D4sWLcebMGaxfvx47d+7Ec889h1WrVhk9oqGOHv0nQkLKMWqU0ZNQR1hVsJYuXYpx48Zh/vz5GDt2LDZu3Gj1e115eXl48MEHMXPmTIwfPx4FBQVYs2YNnJ2dAQARERHYu3cvNmzYgKeffhq2uppQZuZbcHdfBr7/W3NihcrLyyUuLk48PDzk7rvvluTkZKmpqTF6LJMqLi6WpUuXSteuXWXevHlSUFDQ4vbHjx8XLy8veeKJJ6Surs5MU1qOSZMmyWuvvWb0GNRBVhmsBleuXJGEhATp16+f+Pj4SEJCglRWVho9Vod89913EhsbK926dZOwsDDJzc1t9XPz8vJkwIABsmDBAqmuru7EKS3L1atXxdHRUY4ePWr0KNRBVh2sBhUVFZKQkCDe3t7Sp08fiYuLk8uXLxs9VptUVVVJQkLC9b3G1NTUdn2dwsJC8fPzk8jISLl27ZqJp7RMGRkZ4urqalORtlY2EawGVVVVkpycLCNGjBA3NzeJjY2VixcvGj1Wi+rqRJKTq8XPb6T4+PjI5s2bO3xId+7cORk2bJjce++98uOPP5poUssVFxcn4eHhRo9BJmBTwWpQW1srW7ZskdGjR4urq6u89NI2KSkxeqrGPvxQZPRoEU9PkbfeOio//fRTq5736quvyoULF1rcpqysTMaMGSNBQUHa7W22VWhoqLz00ktGj0EmYFWvErZWly5dsHDhQpw8eRJbtmzBqVPT4OcHLFsGFBYaPR1w/Djw858Ds2cD994L/OtfwOOPT4CTk9Mdn1tdXY19+/Zh2rRpKC4ubna7vn374uDBg6itrUVoaCi+/fZbE/4ElqOmpgZHjhzhG8CthE0Gq4GdnR1mzZqFrVv7IyNDvTl2+HAgKgrIzTX/PMXFwBNPAD/7GeDpCeTnA4mJgIdH67+Go6Mj0tLSMGbMGAQHB+Ps2bPNbtuzZ0/s378fPXv2REhICEpKSjr+Q1iY3Nxc1NbWYuLEiUaPQiZg08G6WVAQkJqq9m569gQmT1bh+sc/Ov97f/cdsGqVimVhIXDsGJCSAvj6tu/rOTo64v3330doaCimTZuGkydPNrutq6sr9uzZgyFDhiAoKAgFBQXt/CksU3Z2NsaPHw8XFxejRyETYLBuExgIbNwInD4NDBigFntriJmpVVYCr7wC3HUXkJkJfPQRsH+/mqGjunTpgnfffRfz58/HvffeiyNHjjS7rYuLC3bv3o3x48cjODgYX331VccHsBBcwNDKGH0SzdIVFYk8/bSIs7NIYKBIcrJIbW3HvmZdnUhKisiQISKDB4skJamPdYb6+npZuXKldO/eXTIyMlrctra2VpYsWSJ9+vSRzz//vHMGMqP6+nrp1auXpKWlGT0KmQiD1Url5SJxcSIeHiJ3363CdfPF8yEhIoDI4cO3Pq9vX5GdO2/8/eBBEX9/kd69RdavF6mqMsPwIrJ27VpxcnKSXbt2tbhdfX29PPPMM+Lh4SGHb/9hNPPFF1+Ivb29XLp0yehRyEQYrDb67juRF19UlxqsWXPj4yEh6mM/+9mt298erAMHRP77v0WuXDHDsLd54403xMnJSVJSUu64bVxcnHTv3l0+/vhjM0xmeufOnZNFixbJyJEjjR6FTIjBaqcffxS5+X/cISEisbEi/fqJfPDBjY/fHiyjbdy4UZycnOSdd96547YNe2U7LekHaEZdXZ3k5OTIc889JwEBAQJAhg4dKo6OjpKYmGj0eGQiDJaJhISIvPyyyIYNIsOGiTS8C8TSgiUism3bNnF0dJT4+Pg7bvvmm2+Kg4ODbNy4sfMHa6OKigrZs2ePLF26VPr37y9du3aVqVOnytq1a+X06dMiIpKeni7Ozs4SGxtr8LRkCgyWiTQEq6ZGZORIkddfVx+3xGCJiKSlpYmzs7Osufm4thmbNm0SJycnefvtt80wWcsKCgokKSlJIiMjxdHRUby8vGTevHmSnJzc7BX7Bw8elB49ejBaVoDBMpGGYImI7Nol4uWlzlNZarBE2vaLvGvXLunWrZvZl2ipra2V7OxsiY2NlXHjxgkA8ff3l9jYWMnOzm71+yqPHTsmnp6esmzZMptcXsdaMFgmcnOwRESCg0VWr7bsYImIHD16VDw9PeXJJ5+84y+yuQ6vLl68KCkpKbJo0SLx8PAQZ2dnCQsLk4SEBCkuLm731/3qq6+kf//+snDhQqtbH81WMFgmcnuwjhwRcXFRD0sOlojIZ599Jr1795aHH374jr/IWVlZ4ubmJr/97W+lvr7eZDMUFBRIQkKChIWFiYODg/Tt21cWLVokKSkpJl1RIj8/XwYNGiSzZs2ymeV1rAmDZSK3B0tEZN48dW2WpQdLROT06dPi7e0t0dHRd1wVouHwqiOrl167dk32798vsbGxMnz4cLG3t5dx48ZJXFycHD9+3KQxvN3XX38tQ4cOlYiICO0XdLQ1DBZd15Zf5IbVS9tyePXNN99IcnKyzJs3T9zc3MTFxUUiIyMlKSlJSktLTfEjyPnz5yU6OlrKyspa3O7ChQsyevRoCQ4OlitGXBRH7WInYqN3JaAmlZWVYcaMGejZsydSU1Ph5ubW7LZnzpxBeHg4AgMDkZKS0uQdpvPy8pCWlobU1FTk5OTAx8cH4eHhiIyMxIwZM1q1ZE5bVFZWYs6cOTh37hz279+PQYMGNbvtpUuXEBERgdraWuzduxdeXl4mnYVMj8GiRr7//ntERESgrq7ujr/IxcXFCAsLg7e3N/bs2QN7e3vk5OQgNTUVO3bsQFlZGSZPnoyoqCiEhYVh3LhxnT5/dXU1FixYgOPHjyMjIwPDhg1rdtsrV64gMjIS1dUO2L07E/368bY6Fs3YHTyyVJcvX5apU6eKv7+/lNxhOdbS0lIZNWqUjB07VpycnMTT01MeeeQR2bZtm2Hv46utrZXFixdL37595eTJky1uW1FRIUuWlMiwYSLnzplpwM6WmSkyfbqIq6t65/6UKSK33wcgJES9T+x2AweKbNpkljHbisvLUJPc3d3x8ccfw9vbG0FBQSgqKmp22/79+yM8PBx1dXXIzMxEeXk5Nm3ahPnz58OjLasPmlDD8jqzZs3C9OnT8emnnza7bffu3bFhwwCMGaPuDN3Cmod62LEDiIgAZsxQq0B+/TWwYAHw4IPAu+8aPV3HGF1MsmxVVVXyy1/+UgYPHixnz55tcpvLly+Lu7u77Nixw8zT3Vl9fb08++yz4urqKpmZmS1uW1sr8qtfqWvnTpww04CmVlur9pCefbbx537/e7XcyNWr6u/cw6KbJScDL79s9BQd4+joiG3btmHq1KlYt25dk9usX78e/fv3x+zZs807XCvY2dnh1VdfxfPPP4+oqCjs27ev2W27dFE7IPPnq7X0W1jz0HJ99ZVa63vRosafe+QR4PJloIW9TYtndDGt2RtviNxzj9FTmEZdXZ1UNbF419WrV6V3796SnJxswFRt8/rrr4uTk5N8cPNyGk2orxdZuVKke3eRO6x5aHkyMtTFf99+2/hz1dXqc9u2qb+HhKjzW56etz7s7S12D6ur0cG0ZoGBQF4eUFMDODgYPU3H2Nvbw9HRsdHHk5KS4OzsjAULFhgwVds89dRTcHBwwIYNvXDtmtrhaIqdHbBuHeDlBcycCbz/PhAdbd5Z263hFd2Skht/blBaqv7Zu/eNjz3+OPBf/3XrdhMmdN58HcRgdaKAABWr/Hxg9GijpzG9mpoaJCQkIDY2Fg6aFPmJJ56Amxvwq1+pm38sX978trGxgKurOkTcvBl44AGzjdk2lZVAw002/u3fgIEDgU2b1H+AN9u8Wd2CafLkGx/r0QPw9r51uy5dOnXcjmCwOpGrK+DnB5w8aZ3BSk5ORlVVFR577DGjR2mTBQvU7+m8ecCVK8ALLzS/7X/8B+DmpvbGfvgBWLLEfHO2KC8PSEsDMjLUn8+fB+ztVWwSE4GFC9W94hYvVrv3KSnA//wP8MYbN+KmIZ5072SBgSpY1qaurg7r1q3DihUr4OzsbPQ4bRYZCXz4oTr0W7Wq5W0XLQLeew948kkgIcEc0zXh2jUVp+XLgcGD1d7T9u3A1Knqlk52N13wOneu+uH27QOGDQOGDAG2bFHHthZT3HYy+iSatXv5ZZGwMKOnML2tW7eKu7u79re5//RTkV69RP793+9856Ldu0W6dTPj+ejycnW3k3nz1AWg3buLREaq2yxduGCmISwL35rTyVJTgcceAy5eNHoS0xo3bhxmzpyJl156yehROuyzz4Bf/AK47z7gr38FurZwouTwYXV438JbLNutvh44elQd6a05OB1dD2cBI0eq3cGZM9UNMlsazhYYXUxrV1ysXkk+f97oSUwnNTVVXFxc5JtvvjF6FJM5fVpdLzl//o31+Fujtbd3a86VKyLbt4ssXqxu/eboqPbIS/6SLvKvf7XhJ7ANPIfVyQYNUq8unzhh9CSm8+ab72Dp0qXoffPL45obORL45BPg+HFgzhx1yqi1PD2BlStbv31REfDWW0BUFNCnjzo3VlkJvPoq8M036u7fA2LuV7cEp1swWGYwZoz1nHjPygI++WQbVq5cY/QoJufjAxw8qC5F+f771j8vJkZFaMeO5rc5elRd7jRypHrleMMGdd48KwsoL1cv4j36KODu3tGfwrrZ+AGxeVjTK4V/+APw0ENOGDjQtOtYWQpvb/XiWlu4ugJxccDq1cCsWU1fJPzhh+p6vP/8T3U66vZLn6h1GCwzCAhQJ1J19/nnQGYmcPq00ZNYnpgYdflTUhLw1FONP79mjdlHsko8JDSDgACgoqIEFRXVRo/SIb//vVqhpIX18GxW167A2rXAiy+qC0ypczBYZuDvX4OLF/3w5Ze5Ro/Sbvn5wO7djd92RjdERwOjRqlwUedgsMzAwcEB/v7+OKnxiay1a4H77wfGjjV6Esu2bp06NPzxR6MnsU4MlpkEBgZqG6z/+z9g61b1ZmBq2aRJ6qR6ZaXRk1gnnnQ3k4CAALz//vtGj9Eur7wCTJmi3rZGtzp4sPHHUlLMPobN4B6WmQQEBODLL79EfX290aO0SXm5WoVz9WqjJyFisMxm7NixqKysxD//+c9Gn/vBgl9Wio9XFzuGhxs9CREPCc3Gw8MDgwYNwokTJzBixIhbPhcYGIjS0lIMHDgQfn5+jR7Dhw9Hjx49zD7zlSvAn/+s9rDseLs+sgAMlhk1nHifP3/+LR8/cOAACgsLUVRUdP2fu3fvRmFhIcrLywEA/fr1g5+fH3x9fREQMAdeXnPh6wv4+qqrpjtjkci//hUYMACwwHtLkI3i8jJm9MILLyA3Nxfp6emtfk5lZWWjmNXWTkdW1mwUFQFXrwKOjmpNNz8/XI/YzX/29GzfvHV1aiHLIUPa93wiU2OwzGjHjh14+umnUVJSYrKvWV6u3nhbVAQUFt765/Pngdpa9Yba//1f4J57TPZtiQzBYJlRQUEBhg4divLycvTp06fTv19NDVBcrAI2ebJ6ky6RzhgsMxIReHh44IMPPkA4X3YjajNe1mBGdnZ2GDNmDHJz9X1PIZGRuIdlZn/729+wZMkSuLi4NHkJg5+fHwYPHoyuZli7e/p0tYDc4cPqSvYG/fqpyxn46iBZGl7WYGYLFixAYGDg9Vf8ioqKkJ+fj48++giFhYWoqKiAg4MDBg8eDF9fX/j6+l6/nEH9+S54ebXzZb8mNCzvm5Njsi9J1GkYLDOzs7ODv78//P39m/z8pUuXUFhYeMsjMzMTpaWlKCoqwujRz+GLL57HgAHq0oXbHyNGtO3kekwMkJyslvedO9dEPyRRJ+EhoUZqa2tx/nwFCgs9Gl3CUFSkbmAAAP3737gO6/Z/DhyobhAMqEPCsDB1k4w//UndQNjBgYeEZLm4h6WRrl27wsfHAz4+QGho489XVQElJSpgDY/Tp4H0dODsWbVGU3KyutnBze60vC+RpWCwrIiT041Dw6aUlQEuLo0/3rC8b0xM45gRWRJe1mBD+vVr/o7FXN6XdMA9LLpu3bqmDzWJLAX3sOg6Lu9Llo6vEhKRNriHRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWmDwSIibTBYRKQNBouItMFgEZE2GCwi0gaDRUTaYLCISBsMFhFpg8EiIm0wWESkDQaLiLTBYBGRNhgsItIGg0VE2mCwiEgbDBYRaYPBIiJtMFhEpA0Gi4i0wWARkTYYLCLSBoNFRNpgsIhIGwwWEWnj/wHFiVrYXgSPsQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE5155C0>"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m_std = standardize(m)\n",
"print(Chem.MolToSmiles(m_std))\n",
"Draw.MolToImage(m_std)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N=c1[nH]cnc2[nH]cnc12\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAYiUlEQVR4nO3de3BUZZ7/8U9DuCeRW7i4EgpEQQQBuakJIVxi0kGBFRlnLcEBIWw56txckXJ+sli1tWGpZbR2t5zszkwNbkYQlhVGUakAkpCIlIAIOHhLEDAhBHAShERCkuf3x1kukW7Cpfuc8yTvVxUVi9NJf2eqfHtO55znCRhjjADAAq28HgAArhbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbB8pHUVCkQkD74oPHf9+olrVvnxUSAvxAsn+nWTXr2Wa+nAPyJYPnMvHnSwYPS2rVeTwL4D8HymdhYafFiadEi6dw5r6cB/IVg+dC8eVLr1lJOjteTAP5CsHwoJkbKzpaWLJFOnfJ6GsA/CJZPTZsm3XGHEy4AjhivB0B4y5ZJEyd6PQXgH5xh+djYsdKUKVJ1tdeTAP4QMMYYr4cAgKvBGRYAaxAsANYgWACsQbAAWINgWej556X/+A+vpwDcR7As1Lat9L//6/UUgPsIlg8ZIz32mPTRR6GPB4PStm1SVZW7cwFeI1g+FAhIR49Kb70V+vjYsVKXLtLmze7OBXiNYPlUMCi9807oY61aSWlp0rvvujsT4DWC5VOZmdLu3VJZWejjwaC0YYNz+Qi0FATLpwYPlvr1kzZuDH08GJQqKqRPPnF3LsBLBMvHMjLCX/Z17SqNGRP+shFojgiWjwWDzhlWuKWSg0E+x0LLQrB8bNIkJ1Y/3PbrvMxMaft26eRJd+cCvEKwfKxDB2n8+PBnUXffLfXsKW3a5O5cfsW+js0fwfK5K93eEAhI6elcFl6KfR2bN4Llcw88IO3bJx06FPr4+aA1NLg7l1+xr2PzRrB8rn9/6fbbpffeC308LU0aMmSddu3a6+5gPsW+js0bwbJAZmb4y77OnaX6+t/onXfedHUmP2Nfx+aLYFngwQdPqbp6nc6ePRvyeDAY1Lt8kHUB+zo2XwTLAklJ7bR9+ywVFBSEPD5lyhR99NFHqqiocHky/2Jfx+aJYFmgXbt2mjhxYtizqKFDh6pPnz7aGO45nhZq2TLplVek777zehJECsGyRDAY1DtXeA4nIyODy8IfYF/H5od9CS1x5MgRJSYm6ssvv9SAAQMuO75+/XrNmTNHFRUViolp/ht6nz4t/e530s9+5tyPhpaBMyxL9OnTR0OGDAl7FjV58mRVV1drx44dLk/mvoICadgw6fe/d1asuBY1Nc5D5Vu3RmU0RBnBskhmZmbYYHXq1EkpKSnN+rLw+++dDTgmTXJuqN2503k06Vq0by9NmOA8IfCP/8h6YrYhWBYJBoN6//33VR3mQ5mmPuey2Y4d0ogR0po1ztLQr7witWt37T8nEJAWLnSeLfy3f5OmT5cqKyM8LKKGYFkkOTlZ7du31/vvvx/yeGZmpvbs2aPS0lKXJ4uec+ekpUullBTnz969ztcbFQxKe/ZI5eXOumL79t34z0T0ESyLxMTEaPLkyWEv+wYOHKhbb7212VwW7tvn/Kbv1VedR5NycqROnSL38/v0cT4PS0113uePf4zcz0Z0ECzLNHXZ1xzueq+rc86qRo2ShgxxwjVhQnTeq1076T//U/rtb6Unn5QWLJBqa6PzXrhxBMsymZmZ+vrrr3XgwIGQx3/0ox/prrvucnmqyPnLX6R775Vefln6n/+RXntNiouL/vvOni0VFUl5eVJysnT4cPTfE9eOYFmmV69eGjFixIWzrKNHj+qJJ57Q8OHD9fOf/1wnTpzQU0895fGU166+vl7Ll9dq5EjnkZoDB6QHH4zsezT1XOGIEc7mtd26OWd3LIzoPwTLQnPmzFHHjh114sQJpaWlqaSkRI8//ri++eYbLViwQAkJCbrrrrv0zDPPaO3atTp+/LjXI1/Rl19+qZSUFG3Z8pJWrXLOqjp3jux7fPSRs1RPU79E7dbNec2vfuXcJb90Kbc++IqBlSorK83IkSNNUlKSOX36dKNjxcXFJicnx8yaNcskJiYaSaZ///4mKyvLrFixwhw+fNijqRtraGgwOTk5JjY21sycOdMcP348qu+Xk2NM27bGLFxoTF1d06/fsMGYvn3rzdy5/8+cOnUqqrPh6vBojoXOnDmjjIwMnTt3Tnl5eYpr4kOekpISFRYWqqioSBs3btShQ4fUv39/JSUlKTk5Wenp6erbt69L0zsOHTqkuXPnavfu3Vq6dKmysrJced9t26RHHpEGDZJWrZJ69Ljy6w8e/EYzZkxVTU2N1q5dq8GDB7syJ8Lwupi4NtXV1SY1NdUMGzbMnDx58rp+RnFxsVmxYoXJysoy/fr1u3AGNmvWLJOTk2NKSkoiPHVjK1asMHFxcSYYDJpvvvkmqu8VyrFjxkyYYEyfPsZ8+GHTr6+pqTHz5883sbGxZuXKldEfEGERLIucPXvWZGZmmoEDB5ry8vKI/dzS0lKzevVqk5WVZQYPHmwkmd69e5uZM2eanJwcs3///oi8z9GjR83UqVNNfHy8ycnJicjPvF7nzjmXhu3bG/Pyy1f3PStWrDAdOnQwWVlZ5uzZs9EdECERLEvU1taaqVOnmgEDBpiysrKovldZWVnEA7Z69WrTrVs3k5aW5pvP0Iwx5s03jbnpJmNmzTLmzJmmX797927Tr18/k5SUZEpLS6M9Hn6AYFmgrq7O/PjHPzaJiYnm4MGDrr//0aNHzerVq80zzzxjRo4caQKBgOnVq5eZOXOmefnll83OnTtNQ0NDyO89duyYeeihh0zHjh1Ndna2qa+vd3n6pn32mTF33mnM8OHGFBc3/foTJ06YjIwMk5CQYDZt2hT9AXEBwfK5+npj5s79qbnllltM8dX82+SC8vLyywLWs2fPywK2Zs0ak5CQYO677z7zxRdfeD32FVVVGTNjhjEZGW+aDRs2NPn6hoYGk52dbdq2bWuys7PDBhuRRbB8rKHBmKwsY1JTj5vPPvvM63HCKi8vN2+88Yb56U9/au68804TCATMzTffbNq3b29+85vf+PKsKpSGBmOWLVtm2rZta1588cWrmnvDhg2mS5cuZtq0aaaystKFKVs2bmvwseeek/7rv6QtW5y7sG1x/Phx/eIXv1BZWZm2bNni9TjXbNu2bXrkkUc0aNAgrVq1Sj2auPfh8OHDevjhh1VVVaW1a9dqyJAhLk3a8nCnu0+98ILzUG5enl2xkqSEhATNnz9fH3zwgU6fPh3yNfv27dPTTz/t8mRXZ9y4cdqzZ48kadSoUU2u4pqYmKj8/Hzde++9CgaDquXp6ejx+hQPl3vpJWM6djSmoMDrSa7fuXPnTOfOnc1bb70V8nhxcbGRZA4dOuTyZFfv3LlzZuHChaZ9+/bm5au496GkpMQEAgHff15nM86wfOaVV6R//mfp7belceO8nub6xcTEKC0tLexSN/3799eAAQN8vTVZTEyMsrOztXLlSi1evFizZ88Ou9qrJOXm5mrMmDG67bbbXJyyZSFYPvL73zvL965ZE731n9wUDAb19ttvhz1+pTXq/WT69OnasWOHdu/eraSkJJWUlIR83euvv67HHnvM5elaGK9P8eD44x+NadPGmNWrvZ4kco4dO2ZatWplPv3005DH3333XRMbG2u+//57lye7PlVVVWbGjBkmMTHxspl37NhhYmJiIvoEAi7HGZYPrF0rZWVJK1ZIM2d6PU3k9OjRo9HaXT804f9OIwsLC90c67rFx8drzZo12rhxo9r9YAeM3NxcBYNB9bzWbXxwTQiWx2pqpGefdX4j+Hd/5/U0kXely7527dopNTXVisvC8wKBgAYNGtTo7+rq6rR69WouB11AsDzWoYO0f7/0+ONeTxIdwWBQ27ZtU1VVVdjjtm9NtnHjRtXU1OjBSC+RissQLB+I5E4wfjN27Fh16dJFmzdvDnn8gQce0IEDB1RcXOzyZJGTm5urhx9+WB06dPB6lGaPYCGqWrVqdcXbGxITEzV48GC99957Lk8WGadOndKf//xnLgddQrAQdcFgUBs2bJAJ8xSYLbc3hLJ27Vp16dJFKZHY3RVNIliIumAwqIqKCn3yySdhj2/ZsuWKN2X6VW5urmbNmqXWrVt7PUqLQLAiKDVVCgSkDz5o/Pe9eknr1jV+3a9/ffn333KLlJsbxQE90rVrV40ZMybsh+vjxo1T27ZtlZ+f7/JkN6asrEz5+fl69NFHvR6lxSBYEdatm3ObAhq70o7Ubdq00aRJk6y7LFy//rgeeOAnGjp0qNejtBgEK8LmzZMOHnRuBsVFmZmZ2r59u06ePBnyeFOP8fjRq68OU0rK77weo0UhWBEWGystXiwtWiSdO+f1NP5x9913q2fPnsrLywt5fMqUKfr666/1+eefuzzZ9dmzR/r0U2fLMLiHYEXBvHlS69ZSTk741yxfLnXv3vjP0aPuzei2QCCg9PT0sJd9vXv31rBhw6y5iTQ3V0pLk/7mb7yepGUhWFEQEyNlZ0tLlkinToV+zfz5zn+lL/3T1Kaetjv/OVZDQ0PI47bc3tDQ4GzCyq1X7iNYUTJtmnTHHU64QomLc34reOmf5v6b8fT0dFVWVmrnzp0hjweDQeXn5+u7775zebJrs2mTVFkpTZ/u9SQtD8GKomXLnAX5fP7vn2vi4+N13333hb3su/feexUXF+f7deBzc6WHHnI+r4S7CFYUjR0rTZkiWXg/ZNRc6faG1q1bX/ExHj84c0Z6800uB73Crjlw1b59+zR8+HCVlZWFXDvqtdde0wsvvKDDhw8rEAh4MOGV5eY6uxkdOdL8L+H9iDMsuGro0KHq06dP2LXcg8GgysrKtH//fpcnuzq5udKjjxIrrxAsuC4jIyPsZV9CQoJGjRrly8vCigpp82YuB71EsOC6YDCojRs3qq6uLuxxPwbrT3+Sbr9dGj7c60laLoIF102ePFnV1dVhNyjNzMxUYWGhKisr3R2sCf/939Ls2V5P0bIRLLiuU6dOSklJCXsWNWrUKHXr1k2bNm1yebLwjJEWLOBy0GsEywUVFdK//7vXU/jLldZyb9Wqle6//35fXRYGAk6weBTHWwTLBbW10jPPSF995fUk/pGZmak9e/aotLQ05PHzQeOuG1yKYLngllukIUMkS57rdcXAgQN16623hl3LPT09Xd27d1d5ebnLk8HPCJZLMjMlH13h+MKVbm/o2rWr9u3bp969e7s8FfyMYLkkGJS2buUxnUsFg0Hl5eWptrbW61FgCYLlkqQkqX17yefP9bpqwoQJqqurU1FRkdejXMC6/P5GsFwSE+Ms+MZl4UUdOnTw5Vb1rMvvXwTLRcEgwfqhpvYs9ALr8vtXjNcDtCTBoPTEE9KBA87ifnBuIq2urlarVs5/O2+66Sa1atXqwtf4+Hi1bt36wte4uDjFxMRc+BobG6uEhJ+pri5RbdpInTpJbdte/Nqxo9Su3cWvHTo4l+bnv57/50tdui7/1KlSmzYe/B+DkAiWi3r1kkaMcG5vIFjS5s2b9eSTT2rRokWaMmWKjDEXHsf561//2uhrZWXlhePGGFVVVamhoUGlpaWqqwvo88+l+npnSer6emfRxLq6i19Pnw6/KcjnnzvPCF5q3jxn8cWcHOmpp0J/3/Ll0m9/2/jv/m9cRAnBctn52xt+9SuvJ/HW9u3bNX36dL344otatGiRa+97Plxnzjg39J45I/Xte/nrzq/LP29e+OcH58+X/uEfGv/d6NGRnxkXESyXBYPS0qVSVZV0001eT+ONjz/+WFOmTNEvf/lLV2MlXVzWuEuXpl87bZr0r//a9Lr8l2KdrOjiQ3eXjR0rxce33Nsb9u7dq7S0ND3++ONasmSJ1+M0iXX5/YVguax1a+lv//Zb7dz5odejuO6LL75Qenq6pk+fruXLl3s9zlVhXX5/YU13D+Tm5uq5555TaWmpL9ctj4avvvpK48ePV1pamv7whz9c+K0gcC0IlgdOnDihXr16adeuXRo2bJjX40TdkSNHlJKSolGjRmnlypWKieGjU1wf/jPnge7du2v06NHWbMt+I44dO6a0tDQNHz5cr7/+OrHCDSFYHvHruuWRdPz4cU2cOFF9+/bVqlWr1IY7MHGDuCT0yM6dO3XPPfeooqJCXbt29XqciPv2W+knP/mJzpw5orffflsdfng7OXAdOMPyyMiRI9WjRw/l5eWFPF5bW6uamhqXp4qMU6ekjAzp++9f0fr164kVIoZgeSQQCCg9PT3sZeF7772n+Ph4jRo1Ss8//7zeeustVVVVuTzltauulh580Lmb/I03blLs+Ts1gQjgktBDq1ev1lNPPaXy8vLLfs1fX1+vPXv2qLCwUEVFRcrLy9N3332n4cOHKykpScnJyUpLS1Pnzp29GT6E2lrn7vDSUun9951lWoBIIlgeqqqqUkJCgoqKijS6iYfQ6urqtGvXLuXn56ugoEDbtm1TdXW1Ro4cqfHjxyslJUXjxgUVH+/NSXNtrTRjhvTFF1J+vvOgNxBpBMtj48eP18SJE7V48eJr+r76+np9/PHHKigo0NatW/WXv5zQoUMf6PbbpeRkafJkadIkyY3P8+vrpUcflXbtkgoKpJtvjv57omUiWB7Lzs7WunXr9OGHN/aoTkODs85WUZG0aZNzSfbtt9KgQRcDNmGC1L17hAa/5H1nz3bOqgoKpH79IvvzgUsRLI/t3btXI0aM0NGjR9WjR4+I/dyGBmn/fick52Ny8qSz3VhqqjR+vHT//RdXL7gexkh///fOWuf5+U4cgWgiWD7Qr18/3XPPPZozZ46SkpLUqVOnqLxPSYlz9lVY6Ozgk59/Y2dEzz4rvfaaczZ3550RGxMIi2D5QHFxsZ5++mkVFhaqpqZGo0ePvvBBenJysuLi4rwe8TKLFkmvvipt3iyNHOn1NGgpCJaP1NfX67PPPlNRUZE2bdqkzZs3q6qqqtGtDJMmTfL8zvh/+ifpX/5FysuTxozxdBS0MATLx34YsC1btqiyslIDBw5UcnKyJk+erIkTJ6qbyzc87djh3MYwbpyrbwsQLJs0NDRo7969ys/PV35+vrZt26Zvv/1WGRl/1W23xWv8eCcikf5NIOAXBMtixhh9+umn2rr1TuXnB1RQIB0/7nwAPmGClJLi/LnaXz6mpjofxBcVSffdd/Hve/VydoeZPj0a/yuAq0ewmplLfxOYny8dPiz17+/ch5WU5EQpMTH096amOrdC3H57463aCRb8goefm5n+/aWsLOd2g0OHnJtJFy50trdatMjZ0uqOO6QFC6TXX5cqKhp/P7sew88IVjM3aJATsD/9yXkoubRUeukl59ivfy2tX9/49Zfuehxu41HAKwSrhbn5ZmnmTGdH45ISae7cy18zb56zu09OjvvzAVdCsFq4UBt/nt/1eMkSZzE+wC8IFkKaNs35rCvcrseAF9jCBGEtWyZNnOj1FMBFnGEhLHY9ht9wHxYAa3CGBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANf4/DZwHn7W0sTIAAAAASUVORK5CYII=\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE515828>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ms = MurckoScaffold.GetScaffoldForMol(mol=m_std)\n",
"print(Chem.MolToSmiles(ms))\n",
"Draw.MolToImage(ms)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All rules"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[N]=c1[nH]cnc2[nH]cnc12\n",
"UnicodeEncodeError!\n"
]
}
],
"source": [
"ms_all = Chem.Mol(ms)\n",
"Chem.SanitizeMol(ms_all)\n",
"print(Chem.MolToSmiles(ms_all))\n",
"try:\n",
" Draw.MolToImage(ms_all)\n",
"except UnicodeEncodeError:\n",
" print(\"UnicodeEncodeError!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All except FindRadicals"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N=c1[nH]cnc2[nH]cnc12\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAYAAAB5fY51AAAYiUlEQVR4nO3de3BUZZ7/8U9DuCeRW7i4EgpEQQQBuakJIVxi0kGBFRlnLcEBIWw56txckXJ+sli1tWGpZbR2t5zszkwNbkYQlhVGUakAkpCIlIAIOHhLEDAhBHAShERCkuf3x1kukW7Cpfuc8yTvVxUVi9NJf2eqfHtO55znCRhjjADAAq28HgAArhbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbBAmANggXAGgQLgDUIFgBrECwA1iBYAKxBsABYg2ABsAbB8pHUVCkQkD74oPHf9+olrVvnxUSAvxAsn+nWTXr2Wa+nAPyJYPnMvHnSwYPS2rVeTwL4D8HymdhYafFiadEi6dw5r6cB/IVg+dC8eVLr1lJOjteTAP5CsHwoJkbKzpaWLJFOnfJ6GsA/CJZPTZsm3XGHEy4AjhivB0B4y5ZJEyd6PQXgH5xh+djYsdKUKVJ1tdeTAP4QMMYYr4cAgKvBGRYAaxAsANYgWACsQbAAWINgWej556X/+A+vpwDcR7As1Lat9L//6/UUgPsIlg8ZIz32mPTRR6GPB4PStm1SVZW7cwFeI1g+FAhIR49Kb70V+vjYsVKXLtLmze7OBXiNYPlUMCi9807oY61aSWlp0rvvujsT4DWC5VOZmdLu3VJZWejjwaC0YYNz+Qi0FATLpwYPlvr1kzZuDH08GJQqKqRPPnF3LsBLBMvHMjLCX/Z17SqNGRP+shFojgiWjwWDzhlWuKWSg0E+x0LLQrB8bNIkJ1Y/3PbrvMxMaft26eRJd+cCvEKwfKxDB2n8+PBnUXffLfXsKW3a5O5cfsW+js0fwfK5K93eEAhI6elcFl6KfR2bN4Llcw88IO3bJx06FPr4+aA1NLg7l1+xr2PzRrB8rn9/6fbbpffeC308LU0aMmSddu3a6+5gPsW+js0bwbJAZmb4y77OnaX6+t/onXfedHUmP2Nfx+aLYFngwQdPqbp6nc6ePRvyeDAY1Lt8kHUB+zo2XwTLAklJ7bR9+ywVFBSEPD5lyhR99NFHqqiocHky/2Jfx+aJYFmgXbt2mjhxYtizqKFDh6pPnz7aGO45nhZq2TLplVek777zehJECsGyRDAY1DtXeA4nIyODy8IfYF/H5od9CS1x5MgRJSYm6ssvv9SAAQMuO75+/XrNmTNHFRUViolp/ht6nz4t/e530s9+5tyPhpaBMyxL9OnTR0OGDAl7FjV58mRVV1drx44dLk/mvoICadgw6fe/d1asuBY1Nc5D5Vu3RmU0RBnBskhmZmbYYHXq1EkpKSnN+rLw+++dDTgmTXJuqN2503k06Vq0by9NmOA8IfCP/8h6YrYhWBYJBoN6//33VR3mQ5mmPuey2Y4d0ogR0po1ztLQr7witWt37T8nEJAWLnSeLfy3f5OmT5cqKyM8LKKGYFkkOTlZ7du31/vvvx/yeGZmpvbs2aPS0lKXJ4uec+ekpUullBTnz969ztcbFQxKe/ZI5eXOumL79t34z0T0ESyLxMTEaPLkyWEv+wYOHKhbb7212VwW7tvn/Kbv1VedR5NycqROnSL38/v0cT4PS0113uePf4zcz0Z0ECzLNHXZ1xzueq+rc86qRo2ShgxxwjVhQnTeq1076T//U/rtb6Unn5QWLJBqa6PzXrhxBMsymZmZ+vrrr3XgwIGQx3/0ox/prrvucnmqyPnLX6R775Vefln6n/+RXntNiouL/vvOni0VFUl5eVJysnT4cPTfE9eOYFmmV69eGjFixIWzrKNHj+qJJ57Q8OHD9fOf/1wnTpzQU0895fGU166+vl7Ll9dq5EjnkZoDB6QHH4zsezT1XOGIEc7mtd26OWd3LIzoPwTLQnPmzFHHjh114sQJpaWlqaSkRI8//ri++eYbLViwQAkJCbrrrrv0zDPPaO3atTp+/LjXI1/Rl19+qZSUFG3Z8pJWrXLOqjp3jux7fPSRs1RPU79E7dbNec2vfuXcJb90Kbc++IqBlSorK83IkSNNUlKSOX36dKNjxcXFJicnx8yaNcskJiYaSaZ///4mKyvLrFixwhw+fNijqRtraGgwOTk5JjY21sycOdMcP348qu+Xk2NM27bGLFxoTF1d06/fsMGYvn3rzdy5/8+cOnUqqrPh6vBojoXOnDmjjIwMnTt3Tnl5eYpr4kOekpISFRYWqqioSBs3btShQ4fUv39/JSUlKTk5Wenp6erbt69L0zsOHTqkuXPnavfu3Vq6dKmysrJced9t26RHHpEGDZJWrZJ69Ljy6w8e/EYzZkxVTU2N1q5dq8GDB7syJ8Lwupi4NtXV1SY1NdUMGzbMnDx58rp+RnFxsVmxYoXJysoy/fr1u3AGNmvWLJOTk2NKSkoiPHVjK1asMHFxcSYYDJpvvvkmqu8VyrFjxkyYYEyfPsZ8+GHTr6+pqTHz5883sbGxZuXKldEfEGERLIucPXvWZGZmmoEDB5ry8vKI/dzS0lKzevVqk5WVZQYPHmwkmd69e5uZM2eanJwcs3///oi8z9GjR83UqVNNfHy8ycnJicjPvF7nzjmXhu3bG/Pyy1f3PStWrDAdOnQwWVlZ5uzZs9EdECERLEvU1taaqVOnmgEDBpiysrKovldZWVnEA7Z69WrTrVs3k5aW5pvP0Iwx5s03jbnpJmNmzTLmzJmmX797927Tr18/k5SUZEpLS6M9Hn6AYFmgrq7O/PjHPzaJiYnm4MGDrr//0aNHzerVq80zzzxjRo4caQKBgOnVq5eZOXOmefnll83OnTtNQ0NDyO89duyYeeihh0zHjh1Ndna2qa+vd3n6pn32mTF33mnM8OHGFBc3/foTJ06YjIwMk5CQYDZt2hT9AXEBwfK5+npj5s79qbnllltM8dX82+SC8vLyywLWs2fPywK2Zs0ak5CQYO677z7zxRdfeD32FVVVGTNjhjEZGW+aDRs2NPn6hoYGk52dbdq2bWuys7PDBhuRRbB8rKHBmKwsY1JTj5vPPvvM63HCKi8vN2+88Yb56U9/au68804TCATMzTffbNq3b29+85vf+PKsKpSGBmOWLVtm2rZta1588cWrmnvDhg2mS5cuZtq0aaaystKFKVs2bmvwseeek/7rv6QtW5y7sG1x/Phx/eIXv1BZWZm2bNni9TjXbNu2bXrkkUc0aNAgrVq1Sj2auPfh8OHDevjhh1VVVaW1a9dqyJAhLk3a8nCnu0+98ILzUG5enl2xkqSEhATNnz9fH3zwgU6fPh3yNfv27dPTTz/t8mRXZ9y4cdqzZ48kadSoUU2u4pqYmKj8/Hzde++9CgaDquXp6ejx+hQPl3vpJWM6djSmoMDrSa7fuXPnTOfOnc1bb70V8nhxcbGRZA4dOuTyZFfv3LlzZuHChaZ9+/bm5au496GkpMQEAgHff15nM86wfOaVV6R//mfp7belceO8nub6xcTEKC0tLexSN/3799eAAQN8vTVZTEyMsrOztXLlSi1evFizZ88Ou9qrJOXm5mrMmDG67bbbXJyyZSFYPvL73zvL965ZE731n9wUDAb19ttvhz1+pTXq/WT69OnasWOHdu/eraSkJJWUlIR83euvv67HHnvM5elaGK9P8eD44x+NadPGmNWrvZ4kco4dO2ZatWplPv3005DH3333XRMbG2u+//57lye7PlVVVWbGjBkmMTHxspl37NhhYmJiIvoEAi7HGZYPrF0rZWVJK1ZIM2d6PU3k9OjRo9HaXT804f9OIwsLC90c67rFx8drzZo12rhxo9r9YAeM3NxcBYNB9bzWbXxwTQiWx2pqpGefdX4j+Hd/5/U0kXely7527dopNTXVisvC8wKBgAYNGtTo7+rq6rR69WouB11AsDzWoYO0f7/0+ONeTxIdwWBQ27ZtU1VVVdjjtm9NtnHjRtXU1OjBSC+RissQLB+I5E4wfjN27Fh16dJFmzdvDnn8gQce0IEDB1RcXOzyZJGTm5urhx9+WB06dPB6lGaPYCGqWrVqdcXbGxITEzV48GC99957Lk8WGadOndKf//xnLgddQrAQdcFgUBs2bJAJ8xSYLbc3hLJ27Vp16dJFKZHY3RVNIliIumAwqIqKCn3yySdhj2/ZsuWKN2X6VW5urmbNmqXWrVt7PUqLQLAiKDVVCgSkDz5o/Pe9eknr1jV+3a9/ffn333KLlJsbxQE90rVrV40ZMybsh+vjxo1T27ZtlZ+f7/JkN6asrEz5+fl69NFHvR6lxSBYEdatm3ObAhq70o7Ubdq00aRJk6y7LFy//rgeeOAnGjp0qNejtBgEK8LmzZMOHnRuBsVFmZmZ2r59u06ePBnyeFOP8fjRq68OU0rK77weo0UhWBEWGystXiwtWiSdO+f1NP5x9913q2fPnsrLywt5fMqUKfr666/1+eefuzzZ9dmzR/r0U2fLMLiHYEXBvHlS69ZSTk741yxfLnXv3vjP0aPuzei2QCCg9PT0sJd9vXv31rBhw6y5iTQ3V0pLk/7mb7yepGUhWFEQEyNlZ0tLlkinToV+zfz5zn+lL/3T1Kaetjv/OVZDQ0PI47bc3tDQ4GzCyq1X7iNYUTJtmnTHHU64QomLc34reOmf5v6b8fT0dFVWVmrnzp0hjweDQeXn5+u7775zebJrs2mTVFkpTZ/u9SQtD8GKomXLnAX5fP7vn2vi4+N13333hb3su/feexUXF+f7deBzc6WHHnI+r4S7CFYUjR0rTZkiWXg/ZNRc6faG1q1bX/ExHj84c0Z6800uB73Crjlw1b59+zR8+HCVlZWFXDvqtdde0wsvvKDDhw8rEAh4MOGV5eY6uxkdOdL8L+H9iDMsuGro0KHq06dP2LXcg8GgysrKtH//fpcnuzq5udKjjxIrrxAsuC4jIyPsZV9CQoJGjRrly8vCigpp82YuB71EsOC6YDCojRs3qq6uLuxxPwbrT3+Sbr9dGj7c60laLoIF102ePFnV1dVhNyjNzMxUYWGhKisr3R2sCf/939Ls2V5P0bIRLLiuU6dOSklJCXsWNWrUKHXr1k2bNm1yebLwjJEWLOBy0GsEywUVFdK//7vXU/jLldZyb9Wqle6//35fXRYGAk6weBTHWwTLBbW10jPPSF995fUk/pGZmak9e/aotLQ05PHzQeOuG1yKYLngllukIUMkS57rdcXAgQN16623hl3LPT09Xd27d1d5ebnLk8HPCJZLMjMlH13h+MKVbm/o2rWr9u3bp969e7s8FfyMYLkkGJS2buUxnUsFg0Hl5eWptrbW61FgCYLlkqQkqX17yefP9bpqwoQJqqurU1FRkdejXMC6/P5GsFwSE+Ms+MZl4UUdOnTw5Vb1rMvvXwTLRcEgwfqhpvYs9ALr8vtXjNcDtCTBoPTEE9KBA87ifnBuIq2urlarVs5/O2+66Sa1atXqwtf4+Hi1bt36wte4uDjFxMRc+BobG6uEhJ+pri5RbdpInTpJbdte/Nqxo9Su3cWvHTo4l+bnv57/50tdui7/1KlSmzYe/B+DkAiWi3r1kkaMcG5vIFjS5s2b9eSTT2rRokWaMmWKjDEXHsf561//2uhrZWXlhePGGFVVVamhoUGlpaWqqwvo88+l+npnSer6emfRxLq6i19Pnw6/KcjnnzvPCF5q3jxn8cWcHOmpp0J/3/Ll0m9/2/jv/m9cRAnBctn52xt+9SuvJ/HW9u3bNX36dL344otatGiRa+97Plxnzjg39J45I/Xte/nrzq/LP29e+OcH58+X/uEfGv/d6NGRnxkXESyXBYPS0qVSVZV0001eT+ONjz/+WFOmTNEvf/lLV2MlXVzWuEuXpl87bZr0r//a9Lr8l2KdrOjiQ3eXjR0rxce33Nsb9u7dq7S0ND3++ONasmSJ1+M0iXX5/YVguax1a+lv//Zb7dz5odejuO6LL75Qenq6pk+fruXLl3s9zlVhXX5/YU13D+Tm5uq5555TaWmpL9ctj4avvvpK48ePV1pamv7whz9c+K0gcC0IlgdOnDihXr16adeuXRo2bJjX40TdkSNHlJKSolGjRmnlypWKieGjU1wf/jPnge7du2v06NHWbMt+I44dO6a0tDQNHz5cr7/+OrHCDSFYHvHruuWRdPz4cU2cOFF9+/bVqlWr1IY7MHGDuCT0yM6dO3XPPfeooqJCXbt29XqciPv2W+knP/mJzpw5orffflsdfng7OXAdOMPyyMiRI9WjRw/l5eWFPF5bW6uamhqXp4qMU6ekjAzp++9f0fr164kVIoZgeSQQCCg9PT3sZeF7772n+Ph4jRo1Ss8//7zeeustVVVVuTzltauulh580Lmb/I03blLs+Ts1gQjgktBDq1ev1lNPPaXy8vLLfs1fX1+vPXv2qLCwUEVFRcrLy9N3332n4cOHKykpScnJyUpLS1Pnzp29GT6E2lrn7vDSUun9951lWoBIIlgeqqqqUkJCgoqKijS6iYfQ6urqtGvXLuXn56ugoEDbtm1TdXW1Ro4cqfHjxyslJUXjxgUVH+/NSXNtrTRjhvTFF1J+vvOgNxBpBMtj48eP18SJE7V48eJr+r76+np9/PHHKigo0NatW/WXv5zQoUMf6PbbpeRkafJkadIkyY3P8+vrpUcflXbtkgoKpJtvjv57omUiWB7Lzs7WunXr9OGHN/aoTkODs85WUZG0aZNzSfbtt9KgQRcDNmGC1L17hAa/5H1nz3bOqgoKpH79IvvzgUsRLI/t3btXI0aM0NGjR9WjR4+I/dyGBmn/fick52Ny8qSz3VhqqjR+vHT//RdXL7gexkh///fOWuf5+U4cgWgiWD7Qr18/3XPPPZozZ46SkpLUqVOnqLxPSYlz9lVY6Ozgk59/Y2dEzz4rvfaaczZ3550RGxMIi2D5QHFxsZ5++mkVFhaqpqZGo0ePvvBBenJysuLi4rwe8TKLFkmvvipt3iyNHOn1NGgpCJaP1NfX67PPPlNRUZE2bdqkzZs3q6qqqtGtDJMmTfL8zvh/+ifpX/5FysuTxozxdBS0MATLx34YsC1btqiyslIDBw5UcnKyJk+erIkTJ6qbyzc87djh3MYwbpyrbwsQLJs0NDRo7969ys/PV35+vrZt26Zvv/1WGRl/1W23xWv8eCcikf5NIOAXBMtixhh9+umn2rr1TuXnB1RQIB0/7nwAPmGClJLi/LnaXz6mpjofxBcVSffdd/Hve/VydoeZPj0a/yuAq0ewmplLfxOYny8dPiz17+/ch5WU5EQpMTH096amOrdC3H57463aCRb8goefm5n+/aWsLOd2g0OHnJtJFy50trdatMjZ0uqOO6QFC6TXX5cqKhp/P7sew88IVjM3aJATsD/9yXkoubRUeukl59ivfy2tX9/49Zfuehxu41HAKwSrhbn5ZmnmTGdH45ISae7cy18zb56zu09OjvvzAVdCsFq4UBt/nt/1eMkSZzE+wC8IFkKaNs35rCvcrseAF9jCBGEtWyZNnOj1FMBFnGEhLHY9ht9wHxYAa3CGBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANQgWAGsQLADWIFgArEGwAFiDYAGwBsECYA2CBcAaBAuANf4/DZwHn7W0sTIAAAAASUVORK5CYII=\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=300x300 at 0x7F9AFE52DD30>"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"options = Chem.SANITIZE_ALL ^ Chem.SANITIZE_FINDRADICALS\n",
"ms_norad = Chem.Mol(ms)\n",
"Chem.SanitizeMol(ms_norad, options)\n",
"print(Chem.MolToSmiles(ms_norad))\n",
"Draw.MolToImage(ms_norad)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment