Skip to content

Instantly share code, notes, and snippets.

@ptosco
Last active January 18, 2024 01:12
Show Gist options
  • Save ptosco/dc4d27153e6e8e45aed654761e4d7409 to your computer and use it in GitHub Desktop.
Save ptosco/dc4d27153e6e8e45aed654761e4d7409 to your computer and use it in GitHub Desktop.
DistinctMatches
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem import rdFMCS, rdDepictor"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"pattern='C~C~C(~C)~C'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"borneol = Chem.MolFromSmiles(\"O[C@H]1C[C@H]2C([C@@]1(C)CC2)(C)C\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"squalene = Chem.MolFromSmiles(\"CC(=CCC/C(=C/CC/C(=C/CC/C=C(/CC/C=C(/CCC=C(C)C)\\C)\\C)/C)/C)C\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"sesquiterpene = Chem.MolFromSmiles(\n",
" \"CC(=O)O[C@H]1CC[C@]2([C@H](C1(C)C)CC=C([C@@H]2CC/C(=C/C(=O)O)/C)C)C\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"triterpene = Chem.MolFromSmiles(\n",
" \"OCC12CCC(C2C2C(CC1)(C)C1(C)CCC3C(C1CC2)(C)CCC(C3(C)C)O)C(=C)C\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"pat = Chem.MolFromSmiles(pattern)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAAFqklEQVR4nO3cPWtU7RbH4ZVkJpPBgJVoMYiEiN/BWlBE/Aq2ClYiWtiIosRGRQSxEQRtBDsJNhb6CdKkCxjN4AspYhg1zkwmOc0pngPnFA8Hnnvvva6rSvkvAr9Zd16m9vf39wMAkpouPQAAShJCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQgBSE0IAUhNCAFITQmpvPB7H58+fS88AakoIqb2lpaV4+PBh6RlATU3t7+/vlx4B/4/xeBztdrv0DKCmXITUXrvdjp2dnbhx40Zsb2+XngPUjBDSCLOzs9Fut2Nvb6/0FKBmPI3SOKPRKGZnZ0vPAGrCRUijPHjwIC5dulR6BlAjLkIaZXNzM+bn56Pb7ZaeAtSEi5BGOXToUMzNzcXr169jPB6XngPUgBDSOL9+/Yrnz5/H9+/fS08BasDTKACpuQhprJcvX8b169dLzwAqzkVIY62urka3242FhYXSU4AKE0Iab21tLRYXF0vPACrK0yiNNhgM4vTp07G+vl56ClBRLkIab3d3N1qtVukZQEW5CGm8VqsV79+/j2fPnpWeAlSQEJLCzs5OHDlypPQMoII8jZLKcDiMTqdTegZQIS5C0hgMBnH8+HG/OAP8BxchqfT7/ej1eqVnABXiIiSVXq8XGxsb8eHDh9JTgIoQQtJ58+ZNrKyslJ4BVISnUQBScxGS0mAwiFOnTsWnT59KTwEKcxGS1vLycpw5cyamp30ehMyEkNRGo1F8+fIljh07VnoKUIiPwqR27969ePz4cekZQEEuQlKbTCYxMzNTegZQkIuQ1GZmZuL3799x7dq12NraKj0HKEAISa/T6cTBgwdjamqq9BSgAE+j8Bd//vyJubm50jOAf5CLEP7t1q1bcfHixXj37l28evUqIqJSX/f7/bh9+3ZEROW+hjoTQgBS8zQKf+FpFPJxEZLeZDKJO3fuxI8fP0QQEhJC0hsOh7G9vR0eRyAnT6Ok5g/qARchqd29ezeuXr1aegZQkIuQ1PzTbcBFSFrLy8vRarVEEJITQlIaDAZx//792NjYKD0FKMzTKACpuQhJ58mTJ/Ho0aPSM4CKaJUeAP+0c+fOxcePH0vPACrC0yip9Pv96PV6pWcAFeJplDQGg0GcPHky1tfXS08BKsRFSCrD4TA6nU7pGUCF+BkhKbx9+zb29vbi7NmzpacAFeNplBS63W58+/at9AyggjyN0ni7u7vRann8AP47FyGNNhgM4sSJE35BBvifXIQ03traWiwuLpaeAVSUENJYq6ur0e12Y2FhofQUoMI8jdJYKysr8fTp09IzgIpzEQKQmouQxvn582ecP38++v1+6SlADQghjXPgwIG4cOFCHD58uPQUoAY8jdIom5ubMT8/H91ut/QUoCZchDTKixcv4vLly6VnADXiIqRxRqNRzM7Olp4B1ISLkEaYTCZx8+bN2NraEkHgbxFCGmE0GsV4PI7pad/SwN/jaZTaG4/H0W63S88AasrHZ2pvaWkprly5UnoGUFMuQmpvPB7H169f4+jRo6WnADUkhACk5mkUgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDUhBCA1IQQgNSEEIDU/gUHm+abMjOBjwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f98e72d7990>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pat"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def get_distinct_matches(mol, pat):\n",
" class Attempt:\n",
" def __init__(self, cand_list, distinct_matches_set):\n",
" self.cand_list = cand_list\n",
" self.distinct_matches_set = distinct_matches_set\n",
" self.placed_idx_set = set(sum([list(i) for i in self.distinct_matches_set], []))\n",
"\n",
" matches_uniquified = mol.GetSubstructMatches(pat, uniquify=True)\n",
" if (len(matches_uniquified) < 2):\n",
" return matches_uniquified\n",
" matches_idx_set = set(sum([list(m) for m in matches_uniquified], []))\n",
" degree = [0]*mol.GetNumAtoms()\n",
" for b in mol.GetBonds():\n",
" bi = b.GetBeginAtomIdx()\n",
" ei = b.GetEndAtomIdx()\n",
" if (bi in matches_idx_set and ei in matches_idx_set):\n",
" degree[bi] += 1\n",
" degree[ei] += 1\n",
" matches_uniquified_sorted = sorted([(sum([degree[i] for i in m]),\n",
" frozenset(m)) for m in matches_uniquified])\n",
" max_distinct_matches = len(matches_idx_set) // len(matches_uniquified[0])\n",
" adj_mat = Chem.GetAdjacencyMatrix(mol)\n",
" largest_distinct_matches_set = set()\n",
" largest_distinct_matches_num = 0\n",
" for i, (_, primer) in enumerate(matches_uniquified_sorted):\n",
" attempt = Attempt(list(matches_uniquified_sorted), {primer})\n",
" attempt.cand_list.pop(i)\n",
" attempts = [attempt]\n",
" while attempts:\n",
" attempt = attempts.pop(0)\n",
" while 1:\n",
" sorted_cand_list = sorted([(\n",
" sum(sum([[adj_mat[x][y] for x in m] for y in attempt.placed_idx_set], [])),\n",
" m, e) for e, m in attempt.cand_list\n",
" if not m.intersection(attempt.placed_idx_set) and any(\n",
" (any(adj_mat[y][x] for x in m) for y in attempt.placed_idx_set))], reverse=True)\n",
" if not sorted_cand_list:\n",
" break\n",
" d, cand, e = sorted_cand_list.pop(0)\n",
" for j, (alt_d, alt_cand, _) in enumerate(sorted_cand_list):\n",
" if (alt_d < d):\n",
" break\n",
" deg_attempt = Attempt(\n",
" [(alt_e, m) for alt_d, m, alt_e in sorted_cand_list[j:]],\n",
" set(attempt.distinct_matches_set))\n",
" attempts.append(deg_attempt)\n",
" attempt.distinct_matches_set.add(cand)\n",
" attempt.placed_idx_set = attempt.placed_idx_set.union(cand)\n",
" attempt.cand_list.remove((e, cand))\n",
" n_distinct_matches = len(attempt.distinct_matches_set)\n",
" if (n_distinct_matches > largest_distinct_matches_num):\n",
" largest_distinct_matches_num = n_distinct_matches\n",
" largest_distinct_matches_set = attempt.distinct_matches_set\n",
" if (largest_distinct_matches_num == max_distinct_matches):\n",
" break\n",
" if (largest_distinct_matches_num == max_distinct_matches):\n",
" break\n",
" if (largest_distinct_matches_num == max_distinct_matches):\n",
" break\n",
" return tuple(sorted([tuple(sorted(m)) for m in largest_distinct_matches_set]))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f98e72d7080>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"borneol"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.12 ms, sys: 7 µs, total: 2.13 ms\n",
"Wall time: 1.94 ms\n"
]
},
{
"data": {
"text/plain": [
"((1, 2, 5, 6, 7), (3, 4, 8, 9, 10))"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"get_distinct_matches(borneol, pat)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAATwUlEQVR4nO3dWUxU1x8H8OOfxWFTGQEVBBcWFwRFcMe6gRrlwdRCm7QkPlRRHzS1aScmVqwPzdTEBlJbHY2pNlVx1FTRauvUFVwQca0oIuCG4IYbi4rM9/9QmDIwwACzIOf7eTLcK79zlLnfe84959IFAAQREZGk/mfvBhAREdkTg5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkKRw5swZUVJSYu9mEFEHxCAkKSxevFgcOnTI3s0gog6IQSipV69eifLycns3w2ZcXFxEZWWlvZtBRB0Qg1BSc+bMEevXr7d3M2zG1dVVVFVV2bsZRNQBMQglJVswuLi4SNVfIjIfg7BWUlKSWL16tb2bYTOyBYNs/SUi8zEIa1VVVYlnz57Zuxk2I1swuLq68hkhEZnEIKwlWzDItnhEtv9fIjIfg7CWbCMGPiMkIvoXg7CWbBdKGfsr040OkTXo9Xp7N8EqGIS1ZAwG9peIzHX48GExdOhQcfnyZXs3xeIYhLVkGzHI1l/ZpoKp87p69ar44IMPxOjRo8X3339v9c9xWVmZSEpKEnFxcWLGjBkiKCjIqvXsgUFYS7YLpWwjJNmCn6yvuLhYfPLJJyIuLk5s2rRJvHv3zqr1qqqqxKpVq0RUVJTo1auXmDdvntiwYYMIDg4WGo1GVFdXW7QeAPHrr7+KQYMGiWvXromLFy+K1NRU4ebmZtE6HQGDsJZswcDgp87i1atX4ssvvxTz588Xu3fvFgCsWk+v14t169aJoUOHisePH4uRI0eKFStWiNDQUKHVaq1S/9ixYyIiIkL89ttvYv/+/WLXrl1i8eLF4ubNmyI5OVl8++23Ijg4WGzcuFHU1NS0u15+fr6IjY0VS5cuFStWrBAnT54UoaGhFuhJBwUCAOzYsQPBwcH2bobNpKWlSdXfnTt3IigoyN7N6NSqq6uxdu1arFixAkeOHLFJzb1798Lf3x+hoaFISkqCh4cHoqKioNPprFLv8uXLGDt2LDw9PaHRaKDX6wEA5eXlUKvV6N69O4YNGwatVmuReqWlpUhMTISTkxNUKhWqqqoMx168eGH4c139Hj16IDQ0tM313759C7Vaja5duyIuLg73799vdx/eBwzCWvv27UPfvn3t3Qyb2bdvH/z8/OzdDJtJT0+Xqr87d+5Eamoqzp8/b5N62dnZiIiIQO/evZGQkABnZ2fExsYiOzvbKvUePHiAxMREKBQKJCcn4/Xr1wCAJ0+eQKVSQaFQYMKECTh58qRF6lVWViI5ORnOzs6Ij4/Ho0ePTJ5XV9/FxQXjx4/HiRMn2lRPr9dj69at6NmzJ6Kjo/HPP/8YHa+oqIC3tzfmzZuH27dvG77+9OlTQ/1x48bh+PHjZtc8ceIEhgwZggEDBuDQoUNtavf7ikFY6/Dhw+jZs6e9m2Ezhw8fhlKptHczbEan08HT09PmdTMzM7F9+3bk5+fbpF5RURFmzZoFNzc3zJw5Ew4ODkhISEBeXp5V6lVUVEClUsHR0RGJiYl48uQJAODu3btYsGABHBwcEBMTgytXrlikXk1NDTQaDbp164ZJkybh+vXrJs+rq+/o6IiYmBhcvny5zTX/+OMP9O/fH0FBQWaPNO/du2dU/9KlS2bXu3nzJqZOnYoePXogJSUFNTU1Js87d+4cpk2bBmdnZyxYsAAlJSWGY/fv3zeqf/HixSbrlZWVYcGCBXBycsKSJUvw6tUrs9vaWTAIa2VkZMDFxcXezbCZzMxMqfqblpYGZ2dnPHjwwCb16i4ujo6OiI6OhpOTExYuXIji4mKr1KuurkZKSgrc3d0xa9YsFBUVAQCuXbuG+Ph4ODg4ID4+3vB1S0hPT0dAQABCQkJw9OhRk+fk5uYa1S8sLGxzvUuXLmHMmDFQKpVG05LAvwEZHR0NlUplNGV4/fp1o/oFBQVm1yspKWlyWtJcN27cMKp/69atJs+tqqpCcnIyunbtivj4eDx8+NCsGhkZGYiOjoabmxtUKhWeP39uVD8xMRGOjo4m62u1Wnh7e2PkyJE2mz3oiBiEtc6fP48uXboYfbg6s5ycHLv09/Hjx8jKyjL6sFrTmzdvsHr1aigUCkRGRsLV1RXLly9HWVmZ1WrWTcMOHz4cWVlZAIAzZ85g8uTJcHV1xZIlS8y+yJkjJycHUVFR6N27N7Zu3WrynLNnz2LKlCmG0UN76hcXF+Ojjz6Ci4sLkpOT8ebNmxb/ztmzZzF16lRD/dLSUrPrVVRUGKYlExMT8fjx40bn6PV6aLVahISEwMvLC2q12ii4srKymhw9mfpeddOSEydOxLVr18xua1POnTuHuLg4ODk5max//PhxDB48GAMHDsSff/7Zpho6nQ7h4eHo2bMn1Go1KisrDceuXr2K+Ph4Q/3Tp08jJiYG3bt3R0pKCt69e9eu/r3vGIS1cnNzIYQw+uHpzC5cuAAhhFUDoT69Xo8tW7bAy8sL4eHhUCqVWLNmjVX/vTMyMhAaGgpfX1/s3r0bwL8XixEjRsDT0xPJycl4+fKlxeoVFhZi5syZcHV1hVqtRnV1daNzdDodIiIi4OHhAZVK1a765eXlRtOST58+NTr+1VdfYdu2bUZTazqdDiNHjoS7u3uj0VNL6k9LTp48GTdu3Gh1m3U6HSIjI82uf+DAgVZNS1ZXV0Oj0cDPzw8BAQHQaDRGF3mdToeoqCiToycAyMvLw5QpU1qclmyrzMxMTJw40VC/sLDQaFqyvLy8Xd+/pqYGWq0WAwYMQN++faHRaIx+Do8ePYqoqCgEBARg7ty5VpuheN8wCGsVFRVBCNHoYmJtNTU1KC0ttekdmU6nQ2BgIAIDA9G3b19s2rTJ5EXbUm7duoXY2FjD3efbt2+h1WoRGBgIPz8/pKSkmDWqMNezZ8+wZMkSODo6YsGCBY3Cpu5iERQUBG9vb6jVasNii7aoPy05e/Zso8ULptSNXtpTPz09Hf7+/ggLC8Pp06dNnvPdd9+hR48eGD58OA4cONCofnBwsMnRkymXLl3C6NGjTU5LtlZLozfgv2lJZ2dnqFSqVv/7vHnzBhqNBj4+PhgyZAi0Wq1Rm3U6HcLCwgyjp7KyMqPFMJYcsTek1+uxZ88eDBkyBGFhYRg/fjyuXr1q0Rp1/e/VqxcGDx5s1P+7d+9CCGHVPr5vGIS1SktLIYTAvXv3bFbz6tWrGD9+PMLCwjBkyBDs2bPHqlOVT58+Nbr7LCsrg0ajQe/evTFo0KBGF4v2qluKrVAoEBcXh7t37zY6Xle/f//+0Gg07b4D12q18PHxwYgRI3Du3DmjY9u2bcOZM2ca1e/Tpw/69evXaPRgjpycHERGRqJPnz5NTks2pa6+r6+v2fWLi4sxd+5cs6cl668iHDt2LI4dO2ayvqnRE/DfYhgnJ6cmpyXbqrq6Glu3bkX//v3h7+9vGL1s3boVSqUSH3zwAXJzc9tV49WrV1Cr1ejWrRvGjBljtK2juroamzZtQt++fQ03htbadmHKu3fvUFpaavFRZ30vXrzAypUr4e7ujp9//hnAvytbhRAt3rDJhEGI/y4uAQEBiIqKavLBv6U0XIpdWFho+LCOGjUKf//9t0Xr1T3z8PLyMnn3WX8PVFhYmEX2QJ08eRJDhw6Fn58f9uzZ0+y59fdAtXUPVkFBAWbMmGGYljQVKF988QUcHR0xZ84co+XobdmD9fz5c6NRZ2umGBuqqKiAWq2Gp6cnhg4darJ+3bSkh4cHpkyZ0upVoI8ePYJKpULXrl0RExODCxcuGNVPSUmBt7e3ob5er8eBAwfQr18/BAcHW/xnsr7KykqsWbMGSqUS4eHh8PLywpYtWyx6U/b48WOj/tdfGPL06VMIIZpdWfm+e/jwoWE1aEVFBYQQTa64lZHUQVgXEEqlEhMnTsSpU6cMe5BiYmKssgfq2LFjGDRoEAYOHIi//vrL6Fjdh7VuD1RGRka76+Xn5xs9FK9/95mdnW00SrPEHqy6aUlnZ2csWbKkVc/A6o9ezN2DVTct6ebmhri4ONy5c6fZ85tbxWfuHqy6acnw8HCjEWZ71U3PeXh4GI3eLl68iFGjRqFXr16tHnU2VFRUZFjNGh8fb7Sto6ysDMuXL4erqysiIyOhUCiwevVqi05bN+f58+fIysqy6KizoYKCAnz66adwcHBAamoqgH9vMrp06WJ0c9CZ6fV6dOnSBTk5OfZuSochbRBeuXIF48aNa/SGCAC4c+eO4WIRFxdnkT1Q9d8Q0dJD8YZ7sNqyB6rhGyJMTfnOnz8fCoUCy5YtM7r4NNyDZe4eqLppyYiIiEbTkq3RcA9WU3fqp06dwrBhw9o0LdncKr6m9mDdv38fH374YatWS7ZFcXExFi5cCCcnJ0RHRxtGnZZc2NRwFWH9bSUPHjyAk5MTdu7cabF6Hc3ly5eNtnIoFAqcOnXKji2yLYVCgczMTHs3o8OQLgibe0NESUmJUUBZYg9U/WnJCRMmNHpDRHPaWv/kyZOGN0QcPHiw2XObW8Vn7h6sgoICTJ8+HW5ubk1OS7ZFU6M3S05LNtyD9ezZM8OxvLw8JCQkwMHBATNnzoSbm5vRHj1ry8/Px/bt2616wTp+/DjGjRsHNzc3wzMkAOjRo4dVp0M7Gk9PT5s+H7Q3pVIpVX9bIlUQHj16FCEhIQgMDMThw4cbHV+0aBH69OmDn376CW/fvjV8va17sG7evIlp06a1eyl2VlaWWXuw2vqGiJZW8TX1Bou3b9+2alqyrbKzsxEbGwtnZ2ckJCSgd+/eiIiIsOjUdd0eLKVS2WgP1vnz55GamtqpR0j79u0zejbu6+uL/fv327FFtuXr64v09HR7N8Nm/Pz8pOpvS6QIwuZeXFtfS6v4zN2DVf8NEZZ8cW3D0Vv9PVD13xDR1oAwtYrP1B4sDw8PJCUlITQ0FP7+/ti7d2+7+2aOI0eOYMWKFVi7dq1Vtnu8e/cOmzdvRkBAAPz9/bFt2zaL13hfBAYGWuzF0e+DoKCgTn2j01BQUBDS0tLs3YwOo1MHYUsvrm1Kc6v4WtqDdeLECQwePNhqL67V6/XYuXMnQkJC4OPjg2+++QZTp05Ft27d8OOPP1pkKXZze7D0ej127dqFzz//HMuWLbPohvSOoqqqCj/88APWrVtn76bYzbBhw7BlyxZ7N8NmwsLCpOpveHg4fvnlF3s3o8PotEFo7otrm1NWVgaVSgVXV1eTe7A2bNgAX19f9O/fH+vXr8f8+fNt9uLa6upqbNy4EbNnz8bHH39slV+XUn8P1ujRo232q3XI/kaNGoX169fbuxk2M3r0aKNnpJ3dmDFjpOpvSzrdL+Z9/fq1WLVqlQgLCxM9e/YUeXl5YunSpeJ//2t9Vz09PYVarRa3b98WkyZNEjNnzhSxsbHiwoULwsnJSSQlJYn8/HyxaNEisXnzZpGXlydycnJEamqqcHd3t0Lv/uPo6Cjmz58vDhw4INLS0oSfn5/Fa7i7uwuVSiUKCgrElClTxI4dOyxegzom/uLmzs3FxUVUVlbauxkdRqcLwry8PJGWlibS09OFVqsVPj4+7f6e3t7eQq1Wi7y8PDFw4EAxZswYkZCQIPLz84Wrq6v4+uuvRXZ2tjhx4oQICwuzQC86Fi8vL6FWq8WmTZvs3RSyEdkulLIFv2z9bUmnC8Lhw4eL3NxcMX36dIt/7379+gmNRiPOnTsnysvLxWeffWbxGkQdgYwjJPZXXo72boA1tGUatDUiIiLEwYMHxcuXL61ah8heZBsxyBYMsvW3JZ1uRGhL3bp1s3cTiKxCtguljFPBMvW3JQxCImpEtiDkCFhuDEIiakS2EYNswaBUKoVCobB3MzqMTvmMkIjaR7ZgkK2/K1eutHcTOhSOCImoEdmCQbZnhGSMQUhEjcgWDIsXLxa///67vZtBdsKpUSJqRLbFI9Z+ExR1bF0AwN6NIKKOpaSkRNy+fVuMGzfO3k0hsjoGIRERSY3PCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIagxCIiKSGoOQiIikxiAkIiKpMQiJiEhqDEIiIpIag5CIiKTGICQiIqkxCImISGoMQiIikhqDkIiIpMYgJCIiqTEIiYhIav8Hk9LtwWsWMZwAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f98e72d7350>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"squalene"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.56 ms, sys: 0 ns, total: 2.56 ms\n",
"Wall time: 2.39 ms\n"
]
},
{
"data": {
"text/plain": [
"((0, 1, 2, 3, 29),\n",
" (4, 5, 6, 7, 28),\n",
" (8, 9, 10, 11, 27),\n",
" (12, 13, 14, 15, 26),\n",
" (16, 17, 18, 19, 25),\n",
" (20, 21, 22, 23, 24))"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"get_distinct_matches(squalene, pat)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f98e72d74e0>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sesquiterpene"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 4.06 ms, sys: 0 ns, total: 4.06 ms\n",
"Wall time: 3.88 ms\n"
]
},
{
"data": {
"text/plain": [
"((4, 5, 9, 10, 11),\n",
" (6, 7, 8, 12, 25),\n",
" (13, 14, 15, 16, 24),\n",
" (17, 18, 19, 20, 23))"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"get_distinct_matches(sesquiterpene, pat)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f98e72d7710>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"triterpene"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 230 ms, sys: 0 ns, total: 230 ms\n",
"Wall time: 230 ms\n"
]
},
{
"data": {
"text/plain": [
"((1, 2, 3, 4, 10),\n",
" (5, 6, 29, 30, 31),\n",
" (7, 8, 9, 11, 20),\n",
" (12, 13, 14, 18, 19),\n",
" (15, 16, 17, 21, 22),\n",
" (23, 24, 25, 26, 27))"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"get_distinct_matches(triterpene, pat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment