Skip to content

Instantly share code, notes, and snippets.

@ptosco
Last active January 18, 2024 01:12
Show Gist options
  • Save ptosco/6d70cec235361fbaddc7cbc2cf9c3b5d to your computer and use it in GitHub Desktop.
Save ptosco/6d70cec235361fbaddc7cbc2cf9c3b5d to your computer and use it in GitHub Desktop.
UniquifyMatches
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"pattern='C~C~C(~C)~C'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"smiles='O[C@H]1C[C@H]2C([C@@]1(C)CC2)(C)C'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"pat = Chem.MolFromSmiles(pattern)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"mol = Chem.MolFromSmiles(smiles)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAAFqklEQVR4nO3cPWtU7RbH4ZVkJpPBgJVoMYiEiN/BWlBE/Aq2ClYiWtiIosRGRQSxEQRtBDsJNhb6CdKkCxjN4AspYhg1zkwmOc0pngPnFA8Hnnvvva6rSvkvAr9Zd16m9vf39wMAkpouPQAAShJCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQmpvPB7H58+fS88AakoIqb2lpaV4+PBh6RlATU3t7+/vlx4B/4/xeBztdrv0DKCmXITUXrvdjp2dnbhx40Zsb2+XngPUjBDSCLOzs9Fut2Nvb6/0FKBmPI3SOKPRKGZnZ0vPAGrCRUijPHjwIC5dulR6BlAjLkIaZXNzM+bn56Pb7ZaeAtSEi5BGOXToUMzNzcXr169jPB6XngPUgBDSOL9+/Yrnz5/H9+/fS08BasDTKACpuQhprJcvX8b169dLzwAqzkVIY62urka3242FhYXSU4AKE0Iab21tLRYXF0vPACrK0yiNNhgM4vTp07G+vl56ClBRLkIab3d3N1qtVukZQEW5CGm8VqsV79+/j2fPnpWeAlSQEJLCzs5OHDlypPQMoII8jZLKcDiMTqdTegZQIS5C0hgMBnH8+HG/OAP8BxchqfT7/ej1eqVnABXiIiSVXq8XGxsb8eHDh9JTgIoQQtJ58+ZNrKyslJ4BVISnUQBScxGS0mAwiFOnTsWnT59KTwEKcxGS1vLycpw5cyamp30ehMyEkNRGo1F8+fIljh07VnoKUIiPwqR27969ePz4cekZQEEuQlKbTCYxMzNTegZQkIuQ1GZmZuL3799x7dq12NraKj0HKEAISa/T6cTBgwdjamqq9BSgAE+j8Bd//vyJubm50jOAf5CLEP7t1q1bcfHixXj37l28evUqIqJSX/f7/bh9+3ZEROW+hjoTQgBS8zQKf+FpFPJxEZLeZDKJO3fuxI8fP0QQEhJC0hsOh7G9vR0eRyAnT6Ok5g/qARchqd29ezeuXr1aegZQkIuQ1PzTbcBFSFrLy8vRarVEEJITQlIaDAZx//792NjYKD0FKMzTKACpuQhJ58mTJ/Ho0aPSM4CKaJUeAP+0c+fOxcePH0vPACrC0yip9Pv96PV6pWcAFeJplDQGg0GcPHky1tfXS08BKsRFSCrD4TA6nU7pGUCF+BkhKbx9+zb29vbi7NmzpacAFeNplBS63W58+/at9AyggjyN0ni7u7vRann8AP47FyGNNhgM4sSJE35BBvifXIQ03traWiwuLpaeAVSUENJYq6ur0e12Y2FhofQUoMI8jdJYKysr8fTp09IzgIpzEQKQmouQxvn582ecP38++v1+6SlADQghjXPgwIG4cOFCHD58uPQUoAY8jdIom5ubMT8/H91ut/QUoCZchDTKixcv4vLly6VnADXiIqRxRqNRzM7Olp4B1ISLkEaYTCZx8+bN2NraEkHgbxFCGmE0GsV4PI7pad/SwN/jaZTaG4/H0W63S88AasrHZ2pvaWkprly5UnoGUFMuQmpvPB7H169f4+jRo6WnADUkhACk5mkUgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDU/gUHm+abMjOBjwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fd483507f80>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pat"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAAbbElEQVR4nO3de1SUZR4H8C+IMGCKgKKCIloibhdv4I0JzTJaw9BSywuaRxQ9lp4ubrtWQuesncrOBqYlijew3FBi00xNXB1voIC6SopohcglMYckQG4zv/2DZVZSkcvMvAPz/ZzDUYeZ5/2OJ/vyvM/zvmMjIgIiIiIrZat0ACIiIiWxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCImIyKqxCIlaERHBV199hZiYGFRVVSkdh6hNsBERUToEEd1feXk5Zs2ahUOHDkGlUqFTp074/PPPMXr0aKWjEbVqnBEStQKFhYUYM2YMsrKykJaWhuzsbEydOhVBQUGYNWsWrl+/rnREolaLRUhk4c6ePYsRI0bAxcUFx44dQ58+feDk5ITIyEicPHkSly9fRv/+/bFu3TrwBA9R07EIiSzYd999B7VajWeeeQa7d++Gs7Nzve8/9thjOHbsGKKiorBs2TKMHj0a58+fVygtUevEIiSyUNHR0Zg0aRJWrFiBmJgY2NnZ3fV5NjY2mDVrFjIzM+Ht7Y1BgwZhyZIlKCsrM3NiotaJm2WILExVVRXCw8ORmJiIbdu24dlnn23S6w8dOoSFCxeiuroaa9asQVBQkImSErUNnBESWRCtVougoCAcPnwYqampTS5BABgzZgxOnTqFmTNnIiQkBBMmTEB+fr4J0hK1DSxCIgtx+fJljBo1ClVVVUhJScGf/vSnZo/l6OiIyMhInDt3Drdu3cIjjzyC6Oho6PV6IyYmahtYhEQWIDk5Gf7+/hg8eDAOHDgAd3d3o4zbr18/7N+/H9HR0VixYgWGDRuGjIwMo4xN1FawCKlV0ev1bW5Ws379ejz77LNYsmQJtm3bBpVKZdTx6zbTXLx4EUOHDsXIkSOxZMkSlJaWGvU4RK0Vi5BalZSUFPTu3RuRkZEoKipSOk6L6HQ6/PWvf8XixYuxefNmREZGmvR4Li4uiImJQXJyMpKTk+Hr64uvv/7apMckag1YhNSqaDQa5OXl4b333kOvXr0wdepUJCcnt7oLyUtLSzFp0iTEx8fjyJEjmDZtmtmOHRgYiDNnzuDVV1/FjBkzMGHCBFy9etVsxyeyNCxCalU0Go3h91VVVdi+fTvGjRuHAQMG4MMPP4RWq1UwXePk5eUhMDAQubm5SElJgZ+fn9kztG/fHm+99RYyMzNRVVVl+PvT6XRmz0KkNF5HSK1GTU0NXF1d8fvvv9/zOSqVClOmTMFrr72GwYMHmzFd46SmpmLixIkICAhAfHw8nJyclI4EANi+fTteeeUVeHp6Yu3atRg2bJjSkYjMhjNCajXS09MbLEEAqKioQHx8PIYMGQI/Pz+sW7fOYu6wkpCQgLFjx+LFF1/E9u3bLaYEAWDKlCm4ePEiHn/8cQQEBCA8PBwlJSVKxyIyCxYhtRq3nxZtjIyMDISHh8PT0xPh4eHIzMw0UbKGiQg+/PBDzJ49G5999hmio6Nha2t5//Q6d+6M6OhoHDp0CMePH4evry/i4uKUjkVkcjw1Sq3G+PHjsWfPnhaNERAQgCVLlmDixIlo3769kZLdW0VFBcLCwrB3717s2LEDY8aMMfkxjaGmpgZr1qzBO++8gzFjxmD16tXo3bu30rGITIJFSK2CTqeDm5sbbt68aZTxunfvjtmzZ2PBggXw9vY2yph/VFhYiIkTJ6K4uBjffvstfHx8THIcU/r555+xaNEiaDQaLF26FMuWLYO9vb3SsYiMikVIrUJaWppJNnDY2tpi7NixmD9/Pp5//nm0a9fOKOOeO3cOEyZMgI+PDxISEtC5c2ejjKuUXbt2YdGiRXBxccHatWsxcuRIpSMRGY3lLVQQ3UVT1wcbS6/XIzk5GVOnTjXahfp79uyBWq1GUFAQdu/e3epLEAAmTJiAc+fO4fHHH0dgYCCioqKUjkRkNCxCahWuXLli8kLJz8/He++9By8vL8yYMQNHjhxp8hjR0dGYOHEili1bhpiYGLOsQ5qLs7MzVq9ejZUrV2LVqlVKxyEyGp4apValsrISN27cQGFhIQoKCgy/FhcX13ssNzcXNTU1LT5e//79MWfOHMybNw+urq73fF5NTQ2WLFmC+Ph4fPnllwgODm7xsS3VW2+9hQsXLmDnzp1KRyEyChYhtVnFxcUNlmVBQQHy8/MbtQGnoQv1tVotJk+ejJycHOzatQsPP/ywqd6SRRgxYgSmTJmCN954Q+koREbBIiSrd+vWrQbLsu7Xa9euQa/XY+jQoZg/fz5mzJiBwsJCTJgwAS4uLkhKSkK3bt2UfjsmVVpaCldXVxw/flyRW8MRmQKLkKiRKisrce3aNeTn56OoqAjl5eX45JNP4OTkhH379sHBwUHpiCa3b98+TJ06FVqt1mg7bImUZqd0AKLWwsHBAV5eXvDy8gIAZGVlIS0tDWq12ipKEKjdvatWq1mC1KZw1yhRM2k0GvTr1w/Hjx/H+fPnlY5jFhqNBqNHj1Y6BpFRsQiJmkmj0WDcuHF44oknsGHDBqXjmFx5eTnS09NZhNTmsAiJmunw4cMYPXo0wsPDsXHjRpSXlysdyaSOHz8Oe3t7DBkyROkoREbFIiRqhkuXLiE/Px9qtRqTJk2Ck5MTduzYoXQsk9JoNAgICGhTNwkgAliERM2i0Wjg6+sLDw8P2NnZYc6cOVi7dq3SsUyK64PUVrEIiZrhj6Uwf/58nDx5EqdPn1YwlelUVFQgLS2NRUhtEouQqBnq1gfreHl54ZlnnsH69esVTGU6KSkpsLW15UX01CaxCIma6KeffkJubi4CAwPrPb5gwQLEx8ejpKREoWSmo9FoMGrUKH4WIbVJLEKiJqq7ftDT07Pe4+PHj0eXLl2wbds2hZKZDtcHqS1jERI10b1KwdbWFnPnzsXnn3+uQCrTqaysRGpqKouQ2iwWIVETNTQ7CgsLw/nz53HixAkzpzKduvfi7++vcBIi02AREjXB1atXkZOTc8f6YJ3u3bsjJCQEMTExZk5mOhqNBiNHjoRKpVI6CpFJsAiJmuDgwYPo27ev4cbbdxMeHo5//vOf0Gq1ZkxmOlwfpLaORUjUBI0phSeffBK9evVCfHy8mVKZTlVVFVJSUliE1KaxCImaoDFFaGNjg3nz5mHt2rVo7R/3mZaWBp1Oh+HDhysdhchkWIREjZSXl4cff/yxUbOjOXPmICcnB4cPHzZDMtPRaDQYPnw4HB0dlY5CZDIsQqJG0mg06NWrF7y9ve/7XDc3N0yePLnVb5rh+iBZAxYhUSNpNBo88cQTjX5+eHg4EhMTUVRUZMJUplNTU8P1QbIKLEKiRmrq7EitVqN///7YtGmTCVOZTnp6OiorKzFy5EiloxCZFIuQqBEKCwuRnZ3d5NnR/PnzsW7dOuj1ehMlMx2NRgN/f384OTkpHYXIpFiERI2g0WjQs2dPPPjgg0163ezZs3H9+nXs37/fRMlMh+uDZC1YhESN0NxS6NixI1588cVWt2mmpqYGx44dYxGSVWAREjXCoUOHml0KixYtwq5du5CXl2fkVKZz+vRplJeXc32QrAKLkOg+ioqKcPHixWYX4aBBgzBo0CDExsYaOZnpaDQa+Pn5oWPHjkpHITI5FiHRfWg0GnTv3h0+Pj7NHmPBggVYt24dqqurjZjMdLg+SNaERUh0H8YohenTp6OiogK7d+82UirT0el0OHr0KIuQrAaLkOg+WrI+WMfR0REzZ85sFZtm/vOf/6C0tBQBAQFKRyEyCxYhUQNu3LiBCxcuYMyYMS0ea+HChfj+++9x+fLllgczoUOHDmHw4MHo1KmT0lGIzIJFSNQAjUaDLl26oH///i0ea8CAAQgICMCGDRuMkMx0uD5I1oZFSNSAulKwsbExynjh4eGIjY1FZWWlUcYzNr1ez+sHyeqwCIkaYIz1wdtNmTIFtra2SEpKMtqYLVFaWorc3FycOXMGBw4cwIoVK/Dbb79BrVYrHY3IbOyUDkBkqbRaLTIzM42yPljH3t4es2fPRkxMDF566SWjjVteXg6tVmv4unHjhuHXut/f/njd1+0z044dO6KiogIzZsxA586djZaNyNLZSGv/CG0iE/nmm28wd+5cFBUVwdbWeCdPfvzxR/j4+ODs2bN4+OGH7/h+cXExCgoKUFxc3KivwsJCFBcXG16vUqng4uLS4JeHhwd69Ohh+PPZs2cxffp0BAUFYePGjVCpVCgqKkK3bt2M9r6JLBWLkOgeXn/9dVy5cgWJiYlGHVev12PgwIFwcHCAs7NzvVlaWVmZ4XkdOnSAq6srXF1d4ebmBldXV3Tp0sXw2O2P3/7n9u3bNylPbGwsFi1ahL/97W+IiIiAjY0N4uLi8O233yIhIcGo753IErEIie5hyJAhePnll7F48WKjjZmSkoLFixcjMzMTXl5eCAsLu2ehOTg4GO24d6PT6fD2228jOjoaGzduxLRp0wzfq6mpQXV1NRwdHXHmzBkMGjTIpFmIlMQ1QqK70Ov1cHJyQkFBAXQ6Hdq1a9ei8QoKCvDee+8ZCsfX1xcdOnTA0qVLjZS4aUpLSzFjxgykpaXh8OHD8Pf3r/d9Ozs7tGvXDu+//z5iYmJw6tQpuLm5KZKVyNS4a5Ss3o0bN/D7778DAF577TWkpqbC1tYWb7zxBrZs2YJhw4YhLS2tWWNXVVUhOjoavr6+yM7ORnp6OuLi4lBRUQEXFxdjvo1Gy8vLQ2BgIHJycpCamnpHCdaprKzEzz//jNTUVLi5uSEjIwNXrlwxc1oi02MRklWpWwlISkrCyZMnAQBvvPGG4ZMhpk2bhoceeggAMGnSJGRlZUGtVmPUqFEIDw9HSUlJo4+1a9cuDBgwAB9//DFWr16NgwcPYuDAgQBqN8QoUYSpqanw8/NDjx49cPToUXh5ed3zuSqVCuvXr0ePHj2QmJiI8ePHIzs724xpicyDRUhtWm5uLrKysgAAH330ET744AMAQGFhIT7++GMAwLx583DgwAEAwLBhw9ClSxfD652dnREdHY0TJ07g1KlT8PX1RVxcXIPHzMrKwp///Ge89NJLCA0NRXZ2NmbNmlXvOVqt1uxFmJCQgLFjx+LFF1/Erl27Gv0RS+vXr8frr7+Offv2Ydy4ccjPz8fGjRtNnJbIjISojSguLhYRkf3798vq1atFRCQ2Nlaee+45ERFJT08Xb29v0el0cvPmTXFxcZHCwkIREdHr9fcdv7q6WqKioqRjx44yfvx4+emnn+p9X6vVyuLFi8XOzk6Cg4MlJyfnnmN5e3vLjh07mvM2m0yv18sHH3wgKpVKNm3a1OTXX758WQoKCkSk9u+wV69e8tFHHxk5JZFyWITUal25ckW+/vprERFJSkqScePGiYjIDz/8IB4eHlJdXS1lZWXi5uYmubm5IiLi7+8vu3fvFpHakvz555+bfNy8vDx54YUXxNHRUSIiIuTWrVuyZcsW6dq1qwwZMkQOHz583zE6deokBw4caPKxm6qiokJmzpwprq6ucvDgwRaNlZiYKO7u7pKUlCQiImVlZfLqq6/Kr7/+2vKgRApiEZLFq6mpkXPnzomISGZmpjz//PMiUjtTcXd3l4qKCqmoqBB3d3e5dOmSiIgEBgYaZlyvvvqqvPvuuyJSW351r2+pnTt3iru7u6hUKnF2dpaoqCipqalp1PuxsbGR06dPGyXHvVy/fl3UarX069dPsrKyWjTWtWvXxNfX15A5Ly9Phg4dKnPmzJHKykojpCVSDouQLNq0adMkPj5e3NzcpLS0VKqrq8XT01MyMzNFROTpp5+WrVu3iojI0qVLZenSpSIi8sUXXxhmiJmZmeLh4SFVVVVSXl4uN27caHGu3NxcCQ0Nlfbt28vQoUOlffv2EhoaKtevX7/va69fvy4AGjx12lJnz56V3r17y1NPPWU4ZdxSdSWfkZEhvXr1kg8++EBEak+9Llu2TOLi4oxyHCJzYxGSRfv000/lpZdekpCQEFm/fr2IiCxfvlxeeeUVERH5+uuvRa1Wi0jDM8S5c+fKjz/+2OI8ZWVlEhERISqVSp566ilDIZ8+fVqGDx8urq6uEhMT0+CaY3Z2tgCQmzdvtjjP3ezZs0c6deok8+bNk6qqKqOOfbfToy+88IIEBgYafgjQ6XRGPSaRqbEIyaL99ttv4uLiIps2bRJ/f38REbl69eodM8S6U6fjxo2TL774QkTqzxBbSq/XS0JCgvTu3Vv69esnu3btuuM5Op1OYmJipFOnTjJ69Gi5cOHCXcc6ceKE2NnZNWqDTlNFRUWJvb29YbZmbHFxcXLq1CkREcnPzxc/Pz95+eWXDadH4+Pj5emnnzbJsYlMhUVIxqfLExnrJPJORv3Hq46K9HES2Vpy24PVIskrRcYOEOmoEunQVSRgusjOy4ZnzJ07V1asWCF9+vSRtLQ0ERF57rnnJDY2VkTqzxATExPl8ccfF5Hawjx58mSL305GRoao1Wrp0KGDRERESEVFRYPPLygokNDQULG3t5e33nrrjufv2bNHunTp0uJct6uurpZFixbJAw88IDt37jTq2Hdzt9Ojb7/9tjz44IPyww8/iIhISUlJQ0MQWQwWIRlfo4tQJ7IjVOSBfiKfJItoK0RKrojEhok49xCJrS3DtLQ06dOnj/z973+XsLAwERH57rvvxM/PT0Rq1+tcXV0NM0QfHx/DZREt8euvvxouhwgNDZVffvmlSa//9ttvxdvbWx566CHZv3+/4fEvv/xSfHx8WpyvjlarlbFjx0rPnj0NszVTOn/+vLi7uxt27JaVlcnkyZNFrVYbTo8eO3ZMPD0977jEhMgSsQjJ+BpbhBUakT4qkeV//J93jcgngSLdZooU154+HDp0qGzevFlcXFzkt99+E51OJ3369JH09HQRqT9DbMzOzYZUVVVJVFSUODs7i7+/v6SkpDR7rPLycomIiBB7e3uZMmWKFBUVyerVq2X48OEtyljn0qVL4uvrKyNGjGhyUTeXXq+Xy5drf0jJz88Xf39/mTVrlmHmu3XrVnF3d5e9e/eKiMjNmzd5iQVZNN5ZhpTzw36g8E/A1Mf+8I12wOSpwM1/AxnVAIAFCxYgKSkJ48aNw9atW2Fra4t58+Zh7dq1AIC//OUvcHV1rX11C26QnZycjEGDBuGjjz7CqlWrcOLECYwYMaLZ4zk6OiIyMhLp6enIy8tD//798d133xnlrjJHjx7FqFGjMHDgQPz73/8222cH2tjY4MEHH8Tp06cxcuRITJo0CVu2bIG9vT3effddLF++HAcPHkRQUBBycnIQEBCArVu34qeffjJLPqImU7qJqQ3S5YmMdRBx6izi5nbbl7NIO9X/Z4T7w0Ucg0Su3WXTyK0EEUdHka/KRUSktLRU3NzcJC4uTh555BEREfnll1/ExcXFKGtRFy9elODgYLG3t5fFixebZH1Lr9fLli1bRKVSibu7u2HHaXPExsaKvb29REREmGTTTWPMnDlTEhMTRaR25jtlyhRRq9VSVFQkIrWnRz08POT999+X4OBgmTFjhiI5ie6HM0IykXZA2DbgzJn/f6VvAjxv+0/OtSsg14Hr+jtf/msRIC6AW+3srkOHDpg2bRouX76MmpoaHD16FN26dUNGRkaj75l5N6WlpYiMjMRjj9XOSrOyshAdHd2iMe/FxsYGs2bNwsSJE+Hi4oLBgwdjyZIl9T6M935EBJGRkXjllVewadMmREZGwsbGxuhZGyM+Ph7PP/88CgsLMWbMGNja2uL7779H165dkZCQgEmTJmH58uXYsWMHevTogU2bNimSk+i+lG5iaoNMsEYoInLu3Dnx9PSU1atXN+uemfUi6nSyZcsW6datm/j6+hrWs8whJCRE3n33XTl48KD4+vpK3759G3X833//XUJCQqRHjx5y4sQJMyS9v8zMTPHy8pIVK1aISO2s95133pG+ffvKhg0bxMPDQ6KiohROSdQwFiEZX1N2jW7/367RqAMixZUiJbkiG8JEnLuLrL90x9Bvvvlmi3eEXrt2TYYMGSJubm7y2WeftXhzTVMFBgbKP/7xDxERuXXrlkRERIiDg4MEBwfL1atX7/qavLw8GTJkiDz22GMmvSNNUxUWFsq//vUvEak9PTp16lRRq9WyatUqcXd3N9zXlciS8dQoKcgWmLwBSJoHfLMI6NkR6DYY2FAGbDkChD10xytWrlyJ7t27t+ioXbt2xfTp05GdnY2FCxe2+NPnm+r2zyJUqVSIjIzEuXPnUFFRgUcffRTR0dHQ6XSG5584cQJ+fn7o1q0bjhw5gt69e5s1b0O6d++OkJAQw+lRAHjyySexcuVK7N27F+PHj1c2IFEj2Ij875NKicgsevbsiTVr1iAkJKTe4yKC+Ph4vPnmm+jVqxdiYmKQk5OD2bNnIywsDJ988glsbS3vZ1cRwdChQxEcHIzs7GxcvXoVSUlJcHd3VzoaUaNY3r8qojauuLjYcKnH7eo205w/fx6PPvooAgICEBoaik8//RTR0dEWWYJAbe6vvvoK33//Pezs7HDgwIH6JSilOLN9FTYfu4Z626L0+TgUuwrfXKgyd2SieuyUDkBkTSorK1FeXt7gdYRdunTB5s2bsWzZMjzwwAPw8PAwY8Lm8fLyQnh4OF5++WXFdrESNReLkMiMiouLAeCuM8I/8vHxMXUco3FwcMCcOXOUjkHULJZ5roWojdJqtQBglDvLEJFxcEZIZEbFxcVQqVRwdHRUOoqZ6VGcsR1r/2Nb77HqCoG3UpGI/odFSGRGt186YV1s0fnRYLzg5wbDCqL8gtSv9qBcyVhEYBESmZVWq7XSIgRs7B3xQMeO/1+P0ZegPffVkAXgGiGRGd3r0gkiUg6LkMiMrPfUKJHl4p1liMxo8eLFKCkpwebNm5WOQkT/wxkhkRlxRkhkeViERGbEIiSyPCxCIjOy5l2jRJaKRUhkRtw1SmR5WIREZsRTo0SWh0VIZEYsQiLLwyIkMpPS0lJUVVXx1CiRheF1hERmIiLIzc2Fp6cn7Ox4d0MiS8EiJCIiq8ZTo0REZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNVYhEREZNX+C8seohfxhaDSAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fd4835140d0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mol"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"matches_uniquified = mol.GetSubstructMatches(pat, uniquify=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"35"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(matches_uniquified)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((1, 2, 3, 4, 8),\n",
" (1, 5, 4, 3, 9),\n",
" (1, 5, 4, 3, 10),\n",
" (1, 5, 4, 9, 10),\n",
" (2, 1, 5, 4, 6))"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"matches_uniquified[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"None of the uniquified tuples contains the same atom indices. In fact, the set of sorted tuples has the same length as the original tuple of tuples:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len({tuple(sorted(m)) for m in matches_uniquified}) == len(matches_uniquified)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"matches = mol.GetSubstructMatches(pat, uniquify=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With `uniquify=False` we get twice as many matches:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"70"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(matches)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"However, some of them actually contain the same indices, only as a different permutation:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((1, 2, 3, 4, 8),\n",
" (1, 2, 3, 8, 4),\n",
" (1, 5, 4, 3, 9),\n",
" (1, 5, 4, 3, 10),\n",
" (1, 5, 4, 9, 3))"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"matches[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If we define a uniquifying function that removes tuple containing the same indices just in a different order and apply it to hte non-uniquified matches, we get the same result as when we called `GetSubstructMatches(uniquify=True)`:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def uniquify(matches):\n",
" res = []\n",
" seen = set()\n",
" for m in matches:\n",
" # sort the tuple\n",
" s = tuple(sorted(m))\n",
" # have we already seen this sorted tuple before?\n",
" # If so, skip it, otherwise add it to the\n",
" # uniquified result\n",
" if (s in seen):\n",
" continue\n",
" else:\n",
" res.append(m)\n",
" seen.add(s)\n",
" return tuple(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Our uniquifying function returns the same result as `GetSubstructMatches(uniquify=True)`"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"uniquify(matches) == matches_uniquified"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment