Skip to content

Instantly share code, notes, and snippets.

@john-bradshaw
Last active April 28, 2022 00:26
Show Gist options
  • Save john-bradshaw/40608a0361ea8073faee8fd850a260e2 to your computer and use it in GitHub Desktop.
Save john-bradshaw/40608a0361ea8073faee8fd850a260e2 to your computer and use it in GitHub Desktop.
Notebook showing how different SMILES can sometimes have the same fingerprint
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# But I thought they were different!"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"RDKit WARNING: [20:26:02] Enabling RDKit 2019.09.3 jupyter extensions\n"
]
}
],
"source": [
"import functools\n",
"import warnings\n",
"\n",
"from rdkit import Chem\n",
"from rdkit.Chem import AllChem\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"one = \"CCCCCCCC\"\n",
"two = \"CCCCCCCCCCCCCCCCCCCCCCCC\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAANpUlEQVR4nO3dT1RU9f/H8Qsh/oFITFCBpPyDhKWGqCBIKtqxctOhWdUcW82qM5s6znLOaTV1WsyuM7tY5GJq4Zn+nGMTJiYaCoWEIAgEGIb/wAQ1IJjv4vM785szEAHvmbl/5vlYuRjuvIUPr7ncz/t9b0ooFNIAAEuVqncBAGBuxCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKASJreBSTI9PT0559/vnz58o6OjpMnT65evVrvigBYREooFNK7hrj74YcfPvjgg7a2tszMzPHx8fXr13/66afvvPOO3nUBsAKL/1Hf1NR08ODBo0ePtrW1FRYWnjx5sqamZnh4+N133z148GBHR4feBQIwv5BFXb9+3WazpaSkaJr27LPPejyev//+OxQKzczM1NXV5ebmapq2bNkyp9M5Pj6ud7GmNzMzc/r06c7Ozr6+Pr1rARLNgjE6NDTkcDjS0tI0TcvIyHC5XA8ePIh6zcjIiNPpTE1N1TRt06ZN3333nS6lWkNjY2NlZaWmaUVFRStXrnS73eoTC0gSlorRsbExj8fz9NNPqzNNh8Nx69ateV7f3NxcVlamzsqPHz8+ODiYsFKtobW19dixY+obmJeXV1VVpf5dUlJy7tw5vasDEsQiMTo5Oenz+datWxfOxO7u7oV84dTUlNfrVcmbkZHh8Xj++eefeFdrAYODgw6H46mnntI0LTMz0+VyPXz4MBQKNTQ0lJSUqJ+CzWa7ffu23pVaxO3bt0dGRvSuAnMzfYzOzMz4/f7NmzerX92KioqffvppsQcZGhqy2+3qCDt37rx48WI8SrWG+/fvu1yuFStWaJqWnp7ucDiisnJyctLr9WZkZGiatnr1aq/XyyeTxOXLlw8dOrR58+bc3Fy73X737l29K0I0c8doMBgsLS1V8ffiiy/6/X7J0b755psXXnhB07SUlBS73X7v3r1Y1WkNjx498ng8quU2JSXFZrP19vb+24t7e3tff/119aPZvXv35cuXE1mqNXR1ddXW1qpt0qysrGXLlmmalpubW1dXNzMzo3d1+H9mjdErV67U1NSo39KCggKfzxeTU57Hjx+73e7ly5drmrZu3TrWqzI9Pe33+59//nn1DT9y5Mgvv/yykC8MBAIbN27UNC0tLc3pdP7111/xLtUa7t6963K50tPTNU1btWqVy+UaHR3t7u4+evSo+hEcOHCgvb1d7zLxf8wXowMDAw6HQ22yZ2dnezyex48fx/Yturu7jxw5otZrdXX1tWvXYnt8cwkGgzt27FDfjbKysvr6+kV9+fj4uMvlUo0TeXl5dXV1carTGsbHxz0eT1ZWlqZpqampdrt9aGgo8gV+v1/tAah2vbGxMb1KtYaOjg6bzXbq1CnJQcwUo/fu3XO5XOpUMT093el0xvWiu9/vT/L20p9//vnVV19VAVpYWOjz+aanp5d2qNbW1oqKCnWow4cPX79+PbalWoDaJl2/fn34lL+trW3OV46OjjqdTrW/V1BQ8NVXXyW4VGsYGBg4ceKEOiHbtWuX5FDmiFH1Ef3MM8+oj2ibzZaYNm+1XpOwvbSzszM8vLB27drw8IKEGnxYu3atpmm0l0ZS26RbtmxRAVpeXt7Q0PCfX9XS0rJnz55wa8rAwEACSrWGkZERl8u1cuXKcGfk8PCw5IBGj9GpqSmfz7dhw4bwR3Rra2uCa4hqL7X2ev3jjz+ihhdie0Hz/v37DodDBfTWrVu///77GB7cjILB4O7du9XqKi4uXtQ26fT0tM/nU1cAVq1a5Xa7JyYm4leqBUxMTHi93uzs7PA2aU9Pj/ywho7RQCBQVFSkVtjevXt//PFHvSqZ3V46NTWlVzFxstjhBQnaS0OhUHt7u81mU9+E/Px8n8+3tEV169atcLvejh07GhsbY16qBahtUtWKo07IWlpaYnVwg8boxYsXwyMx27Zt8/v9Rtgxt2p76cTEhM/nUxeCtcUML0io9tLMzMwkbC9V26Th4QW32y3fJq2vr9+2bVu4XY/20kjBYHDnzp1qeb/00ktff/11bI9vuBhVG2fqP5yTk+P1eo123hfVXmrq9aquym3atEl9w/fv37+E4QWJ3t7eN954Q717aWmp5dtL/3N4QSKyXW/NmjU+n88IJx/6Uvd4Uwts48aNkm3SeRgoRm/evDnnfKEBWWO9BoPBV155Ra2wkpIS4fCCRDK0l0YOL6ht0nmGFyRu3Ljx2muvqR9rMreXzr7H25MnT+L0XoaI0ZhvnCWGedtLr1y5cvjwYVV5DIcXJB49euR2u9WgzoYNG6zUXjo9PV1XV5eXlxe+Kvfrr7/G+02Tub30zp07TqdTbZOq4YXZ93iLLZ1jNE4bZ4lkrvbS/v7+8PDCmjVr4voRvQTWay8NBoMvv/yy+h/t2bPn7NmzCXvrJGwvjdwmTUtLi+s2aSTdYjSuG2cJFtVe+u233+pd0RxmDy+Mjo7qXdQcLNNeeunSperqarW8CwsL9Rosbmlp2bt3b3jzsL+/P/E1JEDUPd7mGV6IB31iNN4bZ7owbHvp7OGF33//Xe+i/oOp20vV8IJaCbEaXpCwdntp1D3eysvLz58/n+AaEh2jidk404tqL1Xr1QjtpeojOnJ44erVqzrWs1gNDQ3bt29XxdtsNuNfMY/38IKEJdtLY3uPtyVLXIwmcuNMXwZpLw0EAlu3blVl7Nu3z6S3ozdLe+nY2Jjb7Y7cJv3zzz/1LmoOZ8+eLS4u1szfrtfe3n78+HGDbJMmIkYTv3FmBDq2lzY2NhpweEGir6/PsO2lkcMLapv0xo0behc1H7O360UOL8TpHm+LFd8Y1WvjzCASv16vXbtm8OEFiXB7aWpqqsPh0P3vZbVNGh5eqKysvHDhgr4lLZwZ20vVNmnk8MKdO3f0LioUil+M6rtxZiiRd9utrq6O03o10fCChGovVfcz1re9NBgM7tq1S/1Y9R1ekAgEAvn5+Zrh20vV8ELi7/G2QLGPUSNsnBlQVHtpDNdr5HyhiYYXJK5evbp//361wBLfXqoejqTe/bnnnjPC8IJEZHtpfn6+0dpLZ9/jLQHDC4sV4xg1yMaZMUWu15i0l6rhhciHI5lueGHJdGkv7e/vt9vtapvUgMMLEsZsL9VxeGFRYhajhto4M7Lm5mb53XZnPxzJvMMLEpHtpVu2bDlz5kyc3kg9HEld5g4/HClO76UXQ7WXXrp06cCBA2p5FxUVGXybNAYxasCNM4OLbC9V63VycnLhX27J4QWJ8+fPx6+9NOrhSDabzSBnanGie3tp5D3e1PCC8YcFRDFq2I0zU4hcrwtsL21qago/HMl6wwsS8Wgvnf1wJHMNL0hEtZcm5vc6cpvUaMML8xPFqNqpTE1NPXHihHFmH81lgXfbnT28YNJJ87i6efNmbW2tSr3S0tKmpqalHSfq4Uj79u1byMORLObJkyeR7Xperzd+n9mz7/FmzOGFfyOK0VOnTr355ptJ28kUK/O3lybn8IJEIBAoLCzUltpeeuHChcrKShWg6uFIRr4qF29R7aW//fZbbI+vhhdycnI0kwwvzMkQ9xtFaK720iQfXpBYWntp5PCC5OFI1hMIBAoKCmLbrhd1j7eamprm5mb5YXVBjBqIauIJt5eqbY2UlJTa2tquri69qzOfyPbSQ4cOdXZ2/tsrBwcHk2F4QSKqvfTLL7+UHC1yeGH79u1m74wkRg1HrdecnJyioqKKigqGFyQi20tXrFgxu700rg9Hsh55e6nFhhcUYtSgRkZG+H2OlTnbSyMfjqSuysXp4UgWs+T20q6urvA2qcWGF4hRJItz586VlJSo86Cqqqrww5GOHTvW2tqqd3UmE9VeOv89WSw/vECMIomE20tVM1NZWVl9fb3eRZnYf7aXRg0v2O32oaEhXUqNK2IUSaevr6+zs/P06dPJ3MkUK6q9VF1czs7ODreXzh5esHBnZEooFNIAQKCnp+f9998/c+aMpmlVVVVvvfXWZ5991tPTo2laeXn5xx9/HH66nyURowBi44svvvjwww+Hh4ezsrIePnxYXFz80Ucfvf3222pbycKIUQAx8+DBg08++aSkpGRiYuK9995TfaaWR4wCgEiq3gUAgLkRowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAi/wNNu2KQNxu5gAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fcd218d3bc0>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.MolFromSmiles(one)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAM80lEQVR4nO3cW0wUdxvH8f8iunKwLVVE5FAq1Ug0tchBqz1IYtKWYEwb5Ub3wovijSGxN2hi3AsT5ZJbtMZwQUy4pOnBQDlVERHsQaukeCxaay1VWZBa3J334p9O5t3dWZZ5ZnzzJt/PFeoyz+4cfvM8M7P6DMNQAACnUv7XbwAA/r8RowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKACDEKACLEKACIEKMAIEKMAoAIMQoAIsQoAIgQowAgQowCgAgxCgAixCgAiBCjACBCjAKASOqLLBYOh9va2oqLixctWlRSUuJprSdPnty+fXtycnLTpk0+n8/TWrdu3XrppZeUUosXL/a0kFLq0aNHSqmsrCyvC0UiEcMwIpHI/Pnzva4F/H8zXpSurq4333xTKZWfn5+amlpXV/fw4UMvCoXD4ZaWlpycnLy8PJ/PV15e3t/f70UhwzAmJyeDweDChQu3bNmSlZXV2Nj47Nkzj2r9+uuvgUAgPz8/Nze3pqbm9u3bHhUyDOP8+fMbNmz49NNPV65c2dbW5l2hSCTS0tJSXl7+ySef1NfXT0xMeFcrFAodPHjw2LFj27Ztu3z5sneFDMO4c+fO3r17T5w4cfLkyXA47Gmtc+fOtba2Hj169MGDB54WikQira2tQ0ND3d3dnhYyDGNqaqqvr6+3t3d6etrrWmNjY48fP5Ys4UXE6Ojo6Pbt23VqFxUVVVdXz5s3TymVnZ19/Phxd3eyrq6udevW6Vpr165dunSpUiolJWXPnj337993sVA4HD5+/HhOTo5e/ooVK8yinZ2dLhYyDGNiYuLAgQN+v18plZaWpn/IyMg4cuSI6zvZzZs3d+zYoT/Lyy+/rH+orq4eGRlxt5BhGH19fWVlZbqEnhjy8/NbW1t1I+yicDh86tSp3NxcpdTChQuVUqmpqfv27RsfH3e3kGEYk5OThw4dSktL0xtLKeXdifzu3buBQMDn8+kPlZGREQwG//77by9qXbx4cfPmzUqp119/XSlVU1Nz8+ZNLwpFIpG2trbXXnstPT3d7/cXFBS0tLR4UcgwjKdPnzY2Ni5atGj//v2S5Xgbo7pZMw/7YDCoD/urV69+8MEH+vgpLS3t6+uT19LNml6mXvWRSMTsFt3dyXp6et566y1da8OGDefPnzcMo6Ojw7xSUVNTc+PGDXkh3awtW7ZMB83OnTvv3LljHjw6d9zayazrKj09PRgMTkxMNDc3L1myRCk1f/78+vp64UnbNDY2Zn6EvLy8lpaWwcHBTZs26bVXUVGhV6krLly48Pbbb5tLPnPmTENDw4IFC5RSWVlZTU1NMzMzrhQyj39zY33++edFRUXmH10cIKamphobGzMzM3VY19XVffzxx/ozrly58osvvnCrkGEYY2Nju3btMjdWbW1tRkaGrnv48OGpqSkXaw0MDGzcuFF/kDVr1qxatUr/vHXr1itXrrhYKBKJnD59urCwUG+d3bt3S5bmVYyak7Vu1gKBQGwz2N7ers9sOndu3brlrFZsVkb1aNevX9+5c6cu9MYbb0imVB3W1giztk7//PNPU1OTvk66YMEC4ZRq3aUqKyujOpqenh6z796yZcuPP/7ouJA+/gsKCqxhbf7r+Ph4fX29HiAWL17c1NT0/Plzx7WmpqaCwaBu09LT0xsaGkKhkPk2rOeMQCDw+++/Oy5kxAtrc2ONjIx89NFHeu2tXr36m2++kRQyDMN6GigvL//uu+/031vzTp+chAOENayjDpzOzs41a9aYufPzzz8LP5TZrOnQbGho0PvzvXv36urqUlJSYlesY9Zl5ubmNjc3P3/+XMdIdna2HiDcuhI4NDT07rvv6hW1fv363t5e4QI9idHu7u7YZi0uvZ2sO9nTp0+TL6TXclSzZvfib7/9du3atY5PbrHNmt1b/e2338wdYvny5Q52sgTHv5X++OaFi0Ag8Mcff8ypkBHTrJ07dy7uy77//vv33nvP3PPMmEiePv7N879dd2adYDIzM50NEFHhZQ1rq/b2dvNqjOMp9e7du9bN3dzcHHudyrpBJVOqOVkrpcrKymK3gj6R66sxeoB48uSJs1rt7e26lbZbOYODg+aeU1lZOTAw4KyQNax18xH1nv/666/6+vrU1FSl1Kuvvio5ketjU/cES5YsEfYEJpdjNHGzZsfZlJq4WYtrZmbGwZQad7Ke9bcuXrzoYEpN0KzZefTokbMpNcmwtjIPrblOqVHN2tmzZxO/fnR01Bwg5jSlxk7WiaecZ8+eOR4g7Jo1O93d3eYAUVVV9dNPPyVZyPjvZs0urE1//vmnOUA4CIuhoaF33nknmWbNemjoE/lcB4iokTTBpbBr1659+OGH+pUlJSVnzpyZU6HYDe34BBPLtRiNbdbmOrwMDAxUVlbq1ZR4SrUe/8mHtWlOU2qSzVpcUcNy4p0syWbNTtSU+vXXXyd4cZLNWjK/O+uGtp4jZz3+o8x1Sp21WbPjYEqdtVmLy8GUOmuzZufSpUvm6FpWVjbrqcuwmaxn/S3rAPHKK68k+bzK8PCw+fZKS0uTnKyjBogkrwRG/ZYr9y2sXIhRZ81aXLNOqbpZM8N6Tsd/lFmnVAfNWlyx99lip9S5Nmt22tvbi4uLExzbc23W7CQzpUbdBpm1WYsrdkqNO0DMqVmzk+SUGtWsObhBmvyU6iyso5Zg3dx2x6a8Wfvll19qamr0W121atWXX35p90rhZK3fqnUISBACly5dMg/zkpIS+UXwuKQxeuHCBXOyrqiocOXBjrhTqrBZsxO1m+plOpisZ2U3pUqatbgS3Oayuw3imPU2l3VK1RtLePxbWafUqAEitlmT3NZLPKU6a9bsRE2pUUe4i7dBrM1H3HuwyU/Ws+ro6LAOEFevXrX+qzWshZdu7927l7jLefjwoYu3RhMTxeju3bv1xygsLDx9+rS7j/tduXJl69at5snN3DYbN250fDE7rqmpqcOHD+vQzMjIqK2tzcvL02G9a9eusbExF2tZp9SqqqrPPvtM2KzZiZpSm5qa5M1aXLFTakdHh7BZs2OdUtevX3/27Fl5sxZX7JQaCoXMDsjdK2uxt7k8ug1ifSKwuLhYP68yPDxsNmulpaU9PT3yQna3uaI+6fXr1+W1BgcHzTbu/fff/+GHH4z/7iTcfVDPjihG9+/f71azZkdPqX6/PzMzMy8vz8XjP4o5peoJSDJZJ2bdydLT05XsYa/E+vv7y8vL1b/PgaelpR06dGhyctL1QuPj4/v27dNTqu56cnNzT5065frG0l+kyc/PV/8+sa+UKisrczGsTSMjI9XV1bqE+U2EHTt2uP7Y+fT09JEjR/TDmH6/X28sv99/4MAB17/W1dnZaT6vsmLFCn1mzcnJcf2LMPfv39+zZ49e/tKlS82i69at6+rqcrGQ/iKMPpHPmzevurraPLNu3759dHTUxVp2RDH6+PFjd5u1uKanp3t7e/v6+tx90Deu7u7uoaEhL75IE+XBgwdHjx5tbW2d0z0rB8Lh8MmTJ0+cOLF3717H16yTdPny5W3bth07duzgwYPenVmNf78pVFtbW1FR4fW3LfWUWldX51azZkdPqcuWLSssLHSrWYtLP6+SnZ1dVVXldbM2PDy8efNmn89XUFDg6WQdCoX0ALF8+XKl1OrVq7/66isvCsXlMwxDAbA3MzOTkpLi8/l0b+WpF/Zfz4yPjyulJiYmzKuiHjEMo7+/PzMzs6ioyGzqPXLt2rVQKHTjxo3a2lp9VeTFIEYBQIT/bxQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUCEGAUAEWIUAESIUQAQIUYBQIQYBQARYhQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUCEGAUAEWIUAESIUQAQIUYBQIQYBQARYhQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUCEGAUAEWIUAESIUQAQIUYBQIQYBQARYhQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUCEGAUAEWIUAESIUQAQIUYBQIQYBQARYhQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUCEGAUAEWIUAESIUQAQIUYBQIQYBQARYhQARIhRABAhRgFAhBgFABFiFABEiFEAECFGAUDkP2pNeKQBJeJdAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fcd218d6530>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.MolFromSmiles(two)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"_CHECKS = True\n",
"\n",
"@functools.lru_cache(int(1e5))\n",
"def get_fp(smi_str, num_bits=2048, radius=2):\n",
" mol = AllChem.MolFromSmiles(smi_str)\n",
" fp_ = np.array(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=num_bits), dtype=np.float32)\n",
" if _CHECKS and fp_.sum() == 0:\n",
" warnings.warn(f\"All zeros fingerprint obtained for molecule {smi_str}.\\n\")\n",
" return fp_"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_fp = get_fp(one)\n",
"two_fp = get_fp(two)\n",
"(one_fp == two_fp).all()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"three = \"C1CCC2CCCCC2C1\"\n",
"four = \"C1CCC(C1)C1CCCC1\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAPF0lEQVR4nO3dS1BTVxgH8JMIKIKILx5q8G1EqwUVilprVWZAZcsyM7rJjJu4DJ3pTLpMd2kXnckMm6zosEyNdSZCfVQtAcVnAN/4ICAaFBIwxCRdnJk7GagUcu69556b/2/VofTer1f5c3LuOeczpFIpAgAAmTLyLgAAQGyIUQAAJohRAAAmiFEAACaI0eySSqXwUlFReMJZCDGaLaampn7++eeGhoaampqffvopFovxrkhv4vH4L7/8Ultb29DQ0NLSEolEeFcEakmB3iWTyba2NpPJRAgxGAz0z91sNvt8Pt6l6YfP5zObzfTZ0odsMpna2tqSySTv0kBxiFGdu3Xr1uHDh+mP9969e69ever3+7/66iv6lfr6+vv37/OuUWwDAwNNTU30eW7fvv2PP/7o6uqqq6ujX6mtrb1x4wbvGkFZiFHdevfunc1mW7RoESFk1apVLpfr8+fP9F9NT0+73e7Vq1cTQnJycqxW6+joKN9qRTQ2Nma32xcvXkwIKS4udjqdnz59ov8qmUx6PJ6ysjI6OLVYLKFQiG+1oBzEqA5NT0+7XK7ly5cTQnJzc20224cPH2Z/2/v376WcXblyZXrOwtwSiYTH4ykpKSGEGI1Gi8UyPDw8+9sikYjD4aA5W1BQ4HA4pJwFPUGM6o3f79+5c6f0mf3hw4dzf38wGGxoaKDfX1lZefHiRXXqFNdff/319ddf0yd25MiR3t7eub//8ePHzc3N9Pu3bt3a3t6uTp2gGsSofvT39588eTKzN0her3fz5s30v21qanr69KlydYrr1atXFouFvkFav369x+OZ/xukjo6O3bt30yd87Nixe/fuKVoqqAkxqgfhcNhut+fl5RFCVqxY4XQ6Y7HYQi8Si8VcLteyZcsIIXl5eTabbXx8XIlqRRSNRh0Ox5IlSwghS5cudTgck5OTC71IPB53u91r1qyRpqTfvn2rRLWgMsSo2GZP0o2MjLBc8M2bN1ar1Wg0EkLWrl3rdrsTiYRc1YoomUy2t7dv2LCBvixqbm5+8eIFywXplHROTg79nedyueLxuFzVAheIUYF1dnbu2bOHfk48evTo3bt35bpyd3f3wYMH6ZX3799//fp1ua4slp6enkOHDtHnsG/fvr///luuK/f19Z04cYJeeceOHRcuXJDryqA+xKiQXr58abFY6A+hyWTyeDyy34KOwioqKqRR2ODgoOx30ayhoSFpVF5eXq7QqNzr9W7ZskWakn7y5InstwAVIEYFQ9fQ0Ek6uoZmampKudvNnhNU9HZaQOeIi4qKpDnijx8/Knc7ujqN3o6uTlP0dqAExKgw6PBQ2tOp5vBQhcGvRvBasTB78Is1vAJBjIohEAhIk5U1NTVcJis7Ozul9ZLff//9nTt31K9BOX19fY2NjdJk5Z9//ql+DT09Pd9++600FXvt2jX1a4AMIEa1TlOvzmVfGKAF6a/O6W4ujq/OZV8YACpAjGrX5OSk0+nU4EJOupecLlOle8kzWKaqBXQhpwbPFqBT0vn5+XRK2m63T0xM8C4KvggxqlFer3fTpk3SJN2zZ894VzTTwMDAqVOnpJONzp8/z7uihbl06ZJ00tXx48c1uK2IZdMUqAkxqjm9vb3fffedKJvcZ2zhf/DgAe+K/t+jR48E2uR++fLlqqoqWu0333zzzz//8K4IZkKMaoigRy7N80ApLRD0yCU6JV1aWkrmPFAKeEGMagJNouLiYimJxsbGeBe1MHMcb6oFOkiiiYkJ6XdAYWGhKL8DsgFilD+/379r1y6xPhd/ye3bt6UZierq6itXrvCuKJVKpWYcR3/z5k3eFWUufUZi27ZtGp+RyBKIUZ5m95/gXZE8tPN+LP0tzbp163TzlgadYDQFMcrHHP0n9CF9tVZ+fr7dbld5tVY0GnU6nYWFhXpdM4ROMNqBGFXbPPtP6MPr16+5DAa9Xu/GjRul4fDz589VuCkXgr6W1BnEqKoW2n9CHwKBwIEDB6SdrIp2ypzdCVW5e2kHOsHwhRhVSZYvpVahU+bo6KiWlwqoAJ1geEGMKk6W/hP6oNApf7PPmtPswlWloRMMF4hRBeGYif8kb6dMv99fWVkpvbP+306o2UBTx9lkA8SoUpTrP6EPMzplZtABhaUTajZAJxjVIEblp07/CR3IuFOmLJ1Qs0GWd4JRDWJUTir3n9CHcDic3ilz7kxMT166XAw9iv9XFnaCURliVDZ4T8qiv79f6pT5pU/oHR0dCnVCzQbZ0wlGfYhRGWih/4Q+pHfKrK+vDwaD9Ovpb6UqKioQARnTdycYXhCjTDTVf0IfZqxeOnv2bEtLi2qdULOBLjvB8IUYzVwikWhtbZXWKobDYd4V6cfQ0NDp06eNRqPBYDAYDEaj8fTp00NDQ7zr0g86JZ2bm0sIaW1txVtQFoZUKkUgI8PDw+Xl5QUFBYFAQDoBHmTU3d1dW1tLCAkEAjU1NbzL0aFgMFhbWxuNRkOhEN1jBhlAjGaOxmhpaenw8DDvWnSLbp/F31LllJWVjYyMIEZZGHkXAAAgNsQoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCgAABPEKAAAE8QoAAATxCirSCQSDAZ5V6FP3d3dM/4B5BUMBiORCO8qhIcYzVxJSUlra2s0Gq2qqjp37tzY2BjvivQjFAqdOXOmrq7OYDAYDIa6urozZ86EQiHedenH2NjYuXPnqqqqotFoa2trSUkJ74pElgIG79+/t9lsOTk5hJCVK1e6XK54PM67KLFNT0+7XK6ioiJCSG5u7tmzZ1taWpYsWUIIKSgocDgcU1NTvGsUWyKR8Hg8NDeNRqPFYhkZGeFdlNgQozLo6+trbGykv5Z27Nhx4cIF3hWJyuv1btmyhT7JpqamJ0+e0K8PDg5aLBb6dZPJ5PF4+NYprs7Ozj179tAnefTo0Tt37vCuSA8Qo7Lxer2bN2+eHQEwH319fSdOnKBPz2w2/+evohkRcPfuXfXrFNfLly/xq0ghiFE5xWKx9A+kNpvt48ePvIvSunA4LE2MrFixYu6JEfqBdM2aNdIH0rdv36pZrYgikYjD4cDEiHIQo/IbGhqyWq1Go5EQUl5e7na7E4kE76K0KB6Pu91umok5OTlWq3WemRgOh+12e15eHk1ep9MZi8WUrlZEyWSyvb29oqKCEGIwGJqbmwcHB3kXpUOIUaX09PQcOnSIfobat2/ftWvXeFekLR0dHbt376bP59ixY/fu3VvoFfr7+0+ePCnNA/h8PiXqFFd3d/fBgwfp89m/f//169d5V6RbiFEF0bHAhg0bpLHAixcveBfF3+PHj5ubm+mP99atW9vb21mu5vf7d+7cSa9WX1//8OFDueoU15s3b6TPQ2vXrsXnIaUhRhUXjUYdDkd+fj4hZOnSpQ6HY3JykndRfNBJusWLF8s7SUfXSC1fvlyakv7w4QP7ZUVEZ+eXLVtGCMnLy7PZbOPj47yL0j/EqEpevXplsVgMBgMhZP369R6PJ5lM8i5KPclk0uPxlJWV0YG5xWIJhULy3uLdu3c2m23RokWEkFWrVrlcrs+fP8t7C43zer2bNm2S1oo8ffqUd0XZAjGqqsuXL1dVVdG/6EeOHOnt7eVdkRq6uroOHDhA/69rampu3Lih3L1u3bp1+PBheq+9e/devXpVuXtpRzAYbGhooP/XlZWVFy9e5F1RdkGMqo0u2SktLZWW7AwPD/MuSimvX7+WxuDr1q1TbQzu9Xo3btwojcueP3+uwk25oPvo6Bic7qPLtjG4FiBG+RgbG7Pb7XSWsLi42Ol0fvr0iXdRcpqcnHQ6nYWFhYSQ/Px8u92u8iRdNBqdUcDExISaBShtenra7XavXr2azghbrdbR0VHeRWUpxChPAwMDTU1NdNC0bds2xnfW2jFjMPjs2TNelfAaDivN7/fv2rVLWp9w//593hVlNcQofzN+JB48eMC7oszdvn1bmpqsrq6+cuUK74pSqVSqq6urrq6OVlVbW3vz5k3eFWXu0aNHuvzVKzTEqCbQJTvFxcXifkDT+ItyOiWt6FIBpaVPBBUWFjocDp1NBIkLMaohgr4uEGjZ5uyFq0IkUVa9lhQRYlRzxFq8IuImInm3USktOxfJiQUxqlHaX0rd399/6tQpWuH27dvPnz/Pu6KFuXTpkrSp//jx4xls6ldalm/ZEAhiVLs0u7Ev/YAlulpL0AOW6BFTdM3Qgo6YUho2EIsFMap1mjpmQpf9JzTVCQbH2YgIMSqGQCDA/dAzffef0EInGByuKCjEqDA4HsGbPf0neHWCwVHfQkOMCkblhhDpt6OTdLrvP6FyJ5gZnVDReEZEiFEhqTA8zPL+E+oMD7/UCRXEghgVmHKdMtF/glJuslILU7EgF8So2GR/da6phQFaIPurc00tDABZIEb1QJaFnJpdpqoFsizk1OwyVWCEGNWP9E6ZC91WpP1NU1rAsq0ofdNUZp1QQbMQo3qz0E3uYm3h14KFbnIXaws/ZAAxqkPzPHJJ0AOltGCeRy4JeqAULBRiVLfmOAAU/SdkMUcnGBU6oYJ2IEZ1Lr1TJj2OHv0n5DW7E4yeDtuH+UCM6l8ymWxrazOZTHRkRH+8zWazz+fjXZp++Hw+s9lMny19yCaTqa2tDUfbZQNDKpUikAWmpqZ+/fXXjo6O8fHxxsbGH374gX4aBbnE4/Hffvvt999/Lyoqqq6u/vHHH2lfUtA9xGh2oX/c0pgUZIcnnIUQowAATIy8CwAAEBtiFACACWIUAIAJYhQAgMm/V2k8oxNCPngAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fcd218e45d0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.MolFromSmiles(three)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7fcd218e40d0>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Chem.MolFromSmiles(four)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"three_fp = get_fp(three)\n",
"four_fp = get_fp(four)\n",
"(three_fp == four_fp).all()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment