Skip to content

Instantly share code, notes, and snippets.

@sshojiro
Last active August 11, 2018 07:11
Show Gist options
  • Save sshojiro/a6ce6bc3deed586b03e018dc1b0773b7 to your computer and use it in GitHub Desktop.
Save sshojiro/a6ce6bc3deed586b03e018dc1b0773b7 to your computer and use it in GitHub Desktop.
Morgan fingerprint fragment retrieval fails
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Morgan Fingerprint Fragments"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from rdkit import Chem\n",
"from rdkit.Chem import AllChem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"from rdkit.Chem import PandasTools\n",
"from rdkit.Chem import Draw\n",
"# utilities\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"mol1 = Chem.MolFromSmiles('Oc1ccc(NC(=O)C)cc1')\n",
"mol2 = Chem.MolFromSmiles('O=C(Nc1ccccc1)C')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: unable to load font metrics from dir c:\\users\\shojiro_shibayama\\miniconda3\\envs\\cheminfo\\lib\\site-packages\\rdkit\\sping\\PIL\\pilfonts\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAFNklEQVR4nO3d23LiOhBAUXnq/P8vax6YQwgGc2nZltRrVR5SMEMcYjZCvi211gLAt/6cvQAAY5NRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUfayLMuyLGcvBezuv7MXgKncdrPWernl8g3MyipO1Dqd639gNWNi1m++dK3nO6uQkjIxKzcfeDnw3P6/VjamZM3mhUg61w9lfWM+VmseaJjO9SNb5ZiMdZofH013Rn6KtY6ZWKGz22ngud1KJWUm1uaM9vvMfvdThivpspS7hbresnEXydn9PpdLQI/pV611o5Xb955IHPmUg0HTObJcl1Z+d+9Zai39LRRdk1H2NWJJ4SMymshZH6KHK+mzAemy/PqCCxnlCHOUtNZfX3Aho5RjEjZcSeFNMspxxiqpbU28SUazeDYxevCE6XAlhZdklKP1XNJ1N6+3bNxFcjLKCboqqc3uBDmKiR7tfYzTbTcNKgnq8Wg8mutkYvTTn9528aSTnRiNcpqXQ874mFQ6OYCMcqadSrosSym1SCeHkFFO1qqkx5z9D9bMjc6vz4nRO1/Pkx5zxn7YYDRKFz4akxp40hUZpRdvlrR0k04neObC7vd05J0d7ztpaHHQPf+T0ckNMTF6q7fD6rcpKUVG6dCzkvaZfiVFRulRh7ncoKTJyWhGfQ7rhqakmcnozOTySEqalowyhiHeEroq6bIsA22sG5r9RqGlS0nPCr4DE04ho+kMMawbWq2HPsnSeToZnZZcnmjv004XJxPoiYwygBHfEvYoqYFnn2QU9tKkpNLZPxnNZcRh3dACp53+eYTWC0VjMjonuezH+yU13TkoGaV3E7wlbJTUwHMCMgq7uzTUaadnJaOJTDCsG1pvp52mFQeDTkguu3L756i1Xr93pOY0ZDSREdvqLYH+yehsjHGG4O1hJuZGZ7DeZOFVCoeR0c+tT+Bzd8vtePCko6oPOKabd/grZCCjra2TetJR1ROUdPTlJwkZbWodzSann1yWUsry7/E+eKgJSjolf5TJyGiX7jYT1VpK+e5lp6Qn8swnIaNfab41/FE3W1FS2JWMfmW9iek71/+4c+NGLOlwC/ymWX+vzGT0VAe+nEYsKQzB7vdNra8MeeLlzVaux3RzAG9aeRiNtnZX0s5eSMakh/EU5+EVlVE/JX12xqN+lvB7jz6IzPB7sWI0mtG5Y9LMZ9vM9dumIaNJHVzSdOnsaU6cvcloXnuXNF06yUpGU9v1WupfP+y0E4iGqJOS0excSx2CZJS+rqU+w56tRp3JyCilvF3SA9JpMMtwZJR/tkva/JKW6dJpiDovGeXHRklblS6+AQp6I6P8suu2+5Khnkad+cgo92y7h4/IKA90te1+BoaoU5NRHnu/pKY7f/Ek5COjPLVRUgNPuJr0qDvauZZUOl/o+Dyz7EpGea35TqMTupv9NBmaiYxC2MNoKmkarsUEECKjACEyChAiowAhMgphd1fVLrYv5WL3e2jhrqQamomMQiPSmZUP9QAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAIX8Brhk64uRf6moAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2db97940>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mol1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAE5ElEQVR4nO3d23KbQBBAQUj5/3+ZPGBjBAJhBthbd+XBiW/EFEcDK+F+GIYOgLP+pd4AgLLJKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAyFfqDSBLff/79jCk2w4ogIyy0vcv6Vz8FXjlpJ5X62gOw8twCrySUYAQGQUIkVGAEBkFCJFRXq0XlKzUwy5PeGJlUVINPabv+67rBj+u9sgo72jBYf3PQ84Y0L7vlbQ1djmcsajn4l0Oq6bY3/AHO/Vcf5iDqxEyCp8drOf6sxxfLbCbYdO5ei6+gkOsevYxLMXrufhqjrK62cHw7dp6Lr6yA61i9i6tu6SeH0OppBWza3nVzGuWLp89j5T0wm9HPjz9nrbcd+Y+DMN+ST0/v1YyygHlj6j31XNuLOn+d/lYW4ojo8yUn8u1h0+lj4ycSloZd3iifs8HaxpLT38ABZFRPqlxRH2AkrZDRqlZ2nNnJW2EjPLD1HmDgyUV06LJKNzrSEmNpUWTUXZVOqI+3KwjI6eSlktG4QlHRk4lLZSMwnOUtEoyStd1dZ68by3TW77nWjLKthrbmgPL95WRUbqu6xyvD7N8XxMZZZMj+FZHKqmkRZBRSMYToeogo9R5B8w815fWnLxXQEYhPSUtmozyXm5TW/W2SmpH5E9GIRdyWSgZbZ1hB4JklIZ4zOAOMkqF5JInyShAiIxCvozVRZDRppXyHHXImYzSCo8N3ERGqY1c8jAZbdqUG3e3hNO+Um8AKU3pHHs6zXGmuRwYq0shoy1a1HMyvazb0QvHyWhDtuo5txhLq1Hf/4h8yGj9jtRzYRxLS+xOoZtN0WS0WifqOVduSeFhMlqbYD3nXCpNyGNYQWS0EhfWc67WS6VwIRkt2031XBiGoehfWe9hgFvJaJGeqefcMHRFlxTuI6Mleb6ec/mX1NRJEjJagLT1nMu/pG8l/7lRNxnNWoYL5dmWtKZ7AhiryyKjWcvzWBpLOr6R3NZtAeAxMsoZY6nSjqVvR3WvGuB5Msp5SU7w+77rur1rHV41wMNklJBnSjq/7DneyW//453g8yS3bSZqulQ6Wq/0TP+y864tff+d6enPXzZsKHHdSf2LI6NcYHHUX9iuv6Zz9elFlpSyyCjXW8ynaWVbUr+4pRqujVK/rJbv83kxBVeRUW6xtfSUavxKvnyvnhWTUe7ytqT3XUU9sD0Jlu/VswUySlueOcE/Xc98Lj5wnIxyo6zWmia3ltQz/xsko9yrkZJecvJu4b5QziBoV7yk19bTwVgoGaVp50qqnszJKK07fjVTPXlLRqHrPo2l8YUj9ayYjMK3O5bv1bMFMgq/riqpejZFRuFFpKTq2SYZhaW/llQ9Gyej8MaRNSX1ZCSjsOntWKqeLMgo7JlKqp5skVH4wN1G2CejACF+FxNAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKEyChAiIwChMgoQIiMAoTIKECIjAKE/Acah9PZud4ubwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2db97a30>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mol2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Visualize molecules](http://cheminformist.itmol.com/TEST/wp-content/uploads/2015/04/ipython01.html)\n",
"\n",
"```python\n",
"from rdkit.Chem import Draw\n",
"legends = [m.GetProp('_Name') for m in ms[:10]]\n",
"Draw.MolsToGridImage(ms[:10],molsPerRow=5,subImgSize=(150,150),legends=legends)\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[FindAtomEnvironmentOfRadiusN](https://programtalk.com/python-examples/rdkit.Chem.FindAtomEnvironmentOfRadiusN/)\n",
"\n",
"```python\n",
"env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root)\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from copy import deepcopy\n",
"def set_mol_indexes(m):\n",
" mol = deepcopy(m)\n",
" [mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber', str( mol.GetAtomWithIdx( idx ).GetIdx() ))\n",
" for idx in range(mol.GetNumAtoms())]\n",
" return mol\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"fragment_counts = {}\n",
"n_radius = 2\n",
"n_bits = 1024\n",
"AllChem.GetHashedMorganFingerprint(mol1, n_radius, nBits=n_bits, bitInfo=fragment_counts)\n",
"fragments = []"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAACeUlEQVR4nO3awW6CQBRAUWn6/788XZgQgm0lXgJiztlJdDKrmzeM0xjjBsCrvs7eAMC1yShAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKj7GxaWD58/NrTJ6sF990n7OX77A3wUaZpGmP89fH/Hz5dcPtqcCTTKLt5zNz2huoj12Ua5R3NYR1jzIOq1PKeZJSDbDm2z349yBtaeU8yykFWBbxXVRn5ADLKyVYX+qrK5bhiYjfL95h3T/+lNBZu3n5yTaZR9rQq6ctZdMXEhThDASQO9QCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowDJD+H2Wz3uVqSLAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2b0df940>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fragments = []\n",
"for atom_pair in list(fragment_counts.values()):\n",
" atom_id = atom_pair[0][0]\n",
" radius = atom_pair[0][1]\n",
" if radius != 0:\n",
" env = Chem.FindAtomEnvironmentOfRadiusN(mol1, radius, atom_id)\n",
" sub_struct = Chem.PathToSubmol(mol1, env)\n",
" else:\n",
" smi = mol1.GetAtomWithIdx(atom_id).GetSymbol()\n",
" sub_struct = Chem.MolFromSmiles(str(smi))\n",
" sub_struct.GetAtomWithIdx(0).SetProp('molAtomMapNumber', str( atom_id )) # mol.GetAtomWithIdx( atom_id ).GetIdx()\n",
" fragments.append(sub_struct)\n",
"\n",
"fragments[0]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[((8, 0),),\n",
" ((4, 2),),\n",
" ((5, 0),),\n",
" ((4, 1),),\n",
" ((6, 1),),\n",
" ((2, 2), (10, 2)),\n",
" ((1, 0), (4, 0)),\n",
" ((5, 2),),\n",
" ((0, 1),),\n",
" ((7, 0),),\n",
" ((2, 1), (3, 1), (9, 1), (10, 1)),\n",
" ((1, 1),),\n",
" ((1, 2),),\n",
" ((5, 1),),\n",
" ((0, 0), (6, 0)),\n",
" ((3, 2), (9, 2)),\n",
" ((2, 0), (3, 0), (9, 0), (10, 0), (6, 2)),\n",
" ((7, 1),),\n",
" ((8, 1),)]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAGPklEQVR4nO3d25ajNhAFUDsr///L5METQhD3AnTbe/VDt8d41G77uCQh8R2G4QPAVX/lbgBA3cQoQIgYBQgRowAhYhQgRIwChIhRgBAxChAiRgFCxChAiBgFCBGjACFiFCBEjAKEiFGAEDEKECJGAULEKECIGAUIEaMAIWIUIESMAoSIUYAQMQoQIkYBQv7O3QAa9P1+x++HYRhvHL9fu2V6bPpPUCYxys1m4biYlUeOPXUgZKRTz53S7DsVhXKTGolR8psOAvx+VIpSEZ163jOLy9FiAStJqYUY5T3pFFOulsCNdOopiGClRmKUOw3DkA505moMvEOnnpulSbprHAadHmtglFoYxedOiyfeQ9tUo114elmREpKeidH2vbCsSHrSM1NMjbOsCJ4mRlmQzhE5GR7W6NR37eCyIhkKG8Ro144sK5KhsE2nni0yFHaJ0cZZVnRK+tyMt2z80/SW6Red0Klv3yxJj1SXs/OcTh17XJml7vf7iTSqvF+Ix4nRLiymVXrjeEv6zUOtKjBJhyGapPRGp56cLizAL9D0N9Cj75BqlMwKrEnXCtK1cBzvOT1ESdsPMUp+Y01aTpguJunsRyUnPzr1FGEYhgY6+JU3n4vEKK/aDsqikvRXkMIuMcqrdoOytCQ9YmzvL3l/X8WMT/C4sob26cTunFLGSafS5rson1cMeZzd9nT8fnv36Guv58nDe0dwmpn6XtRbZB3cdjoyFDB5vCqfIvIyNkoeB8dAD247Xe+HBA0Qo10oM2WCs0nlzETROTFKTpEkPXKRKHiBGO1aCQVdMAFlKNmZYqJ60w8Dqcr7vObatzG1XcVf//gFomv5jZ62eH5Y+uQs7iq7fRaEp3eRapTSrW07nT00y1yqdPxTZ/E+u2krSVNilAqc3Xb6HQVu8Hzw/LAZyRgkRhtXe4++cOP2JQ08l7O9CsdXyLQ34DWzSIxCyC9YSitLF+1uCvP5f3qOR+nUbxOjcINhqCBi0kHPXC1pjPNGW1b+G7slRW3xd0qlzS6HGO2ReH1I9iRNGyAiX6BTD3fKfoW+tfPDDh5iiukCVUnLTNPnYtvprnjGmyVDy7FbHt5S7pV2ddV+6NRTh3rTf3dZUfyMIgGaV60vTXpTaYyeXYtJjczUt+M7sXGfN5t0lyaDJp1Sr/Svg059I470HF9vFFsW/15NfmA0TzXagiMbUgzD4P1ZLH+aqonRxilC4WlitHENlDlV93MtK+qBsVF41u6205YJ1U6MwuN2t52WnlXTqW9Bwz3Hqnv0dEI12ohiL1gEzfMea0IVe69f4mOA8qlGz0sza3bLtEO9FgHjfa5mxH+TEoEHKZwMpQpi9G5ppKZBML3xaiEpX6AQpphulWbieOlIoFFi9EXyFFqkU3/JtUDUDT+jgYHRBn4FjhCjl6RTTECvdOopVKt1XDMrIxipRm/1m1DanalnycHlA4s95QLXpOvR90OM3m02Nb8YqdP7vP5OK/PtPbua0KlGxq9lBBFi9Lz0XZqe5LR7YO7rmJeQNRvXyyyheU8o5JnnXsZGe5RuZfKy33WHhn9dfpB7WwXXqEY7lbcmPfX/rsXl+CDZPxVSqs6uiNF+ldO733bk2nzTsdE32gQTOvVdK7COa8PiR1MVH1pcIEZ7V0KSBredzt5+OqdTTxG9+7VtpzeUey2jlZOFC2gZj9DL4I9cSXo2PSuwtubCWoxGqUb5Y2Np0GfzxPi1/F285/hoZdWPL5Ch7RKjLJuF4NllRdsPmH0MAW5kiokFacydSr3ISfXVU3X2R4xyGzPm9EmnntN2lxUxp0RtmhjltCPLivolLvujUw8QohplQXpCvrn1fWv7zI4/zsp2z2crxCjLIsuKdh+wwUQ+ctWD2R1ohRIDwhZDc2NayYxTW4yNwsMUnq0To/Cw9HLcStG2iFGAEDEKECJGIWx2Ve2PvfL64oQnuMMsSXdPfqIhTngCCNGpBwgRowAhYhQgRIwChIhRgBAxChAiRgFCxChAiBgFCBGjACFiFCBEjAKEiFGAEDEKECJGAULEKECIGAUIEaMAIWIUIESMAoSIUYAQMQoQIkYBQsQoQIgYBQgRowAhYhQgRIwChIhRgBAxChAiRgFCxChAiBgFCPkH+5PBFt4rlWEAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2b0dfad0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pprint(list(fragment_counts.values()))\n",
"set_mol_indexes(mol1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAADOUlEQVR4nO3c207iUBiAUTrx/V95zwUJjnJSvpYeZq0rlER/E/jcu6WdxhgnAF71Z+0BAPZNRgESGQVIZBQgkVGAREYBEhkFSGQUIJFRgERGARIZBUhkFCCRUYBERgESGQVIZBQgkVGAREYBEhkFSGQUIJFRgERGARIZBUhkFCCRUYBERgESGQVIZBQgkVGAREYBEhkFSGQUIJFRgERGARIZBUhkFCCRUYDkY+0B+D9M0+fjMdabA+Ynoyxvmr6k89uXsHM29SzsOppjfFmcws7JKEAiowCJjAIkMgqQyCgLuz6h5Ew9x+IDTyzvW0k1lGORUd5COjkum3qAREYBEhkFSGQUIJFRgERGARIZBUhkFCCRUZZ37+6i7jrKIcgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMsrh7t8NzmzyOQUYBEhkFSGQUIJFRgERGARIZBUhkFCCRUYBERgESGQVIZBQgkVGAREZ5q2k6wg1JjvFXMJdpjLH2DPwXzukZY1werD3RK3Y9PAuRURZ3Mz177NE0eb9wg5cFC3rayr3EdC9zsgoZZSk/X7tteZUnoDy13Zcv+/VCerZZq2k6bWwitkhGmVOs4XZiej4Vv4FB2AEZZR4zFnDdPb6A8lsyygxmD98qLRNQXvOx9gDs20Lb8PPPe2fXHAblZVajvOhtxzEtEtk4F4PyivMu/j3/g8c4jXF6cPnl9VOX7zx4CuZiU8/vrHUy/VLSm7/ZlpwVySg/tfqnkR4cMD1HVklZhYzy3OoB/dclptsYB2SUZ7Z5peb1RPcWpA6GsjQZ5a5NLUJ/4mZJv32pqsxORrlhdwGFFckoX+w9oI8/GgVLkFE+bfMw6G8pKW92hLcNwIpcxQSQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpDIKEAiowCJjAIkMgqQyChAIqMAiYwCJDIKkMgoQCKjAImMAiQyCpD8BbB3yxjTmCaGAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2b0ec850>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# contaminated one\n",
"atom_id = 6\n",
"radius = 2\n",
"env = Chem.FindAtomEnvironmentOfRadiusN(mol1, radius, atom_id)\n",
"sub_struct = Chem.PathToSubmol(mol1, env)\n",
"\n",
"# Draw.MolsToGridImage([sub_struct], molsPerRow=1, subImgSize=(150, 150))\n",
"sub_struct"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4QAAAJYCAYAAAA6xSjbAAAchklEQVR4nO3d3ZKbOgJGUUjl/V+ZuTjjhCaA+REg8a1VNVUTH7cbtzFmWwL6YRiGDgAAgDi/nl4AAAAAniEIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAYB/9P3327fcB2b1/c//AY/5/fQCAAB16vuuG4anl4LXmVuxrGzwGCOEAMCsYTB4Q2FL4Wdlg8cIQl6v7/uu9yEDAAD/MGWU1xv+/03kOAoH01IANvkM3CxtNn3fBtA2QUiMcQSKQ4Dt1qJw7jaRCNAOQUgkcQgAAIIQFuJwcLIzgBHn/KCIpeFmZxmFWX3fXz5gIQhh5Gccjm/f/hifnxv/zPhzzucg0CpRSBFzK5IPQfjhM0hxx+w1QQgLpkE3dzvAWy1t68a3b7kPzLKSwKI7RgXHBCFssDcOv52VDwAA5myNwVLhKAhhp7k43PteNOUKAICPvXFXchRREMIJa+/DPadpF4gAAJn2xN0VxxYKQgAA4PXuPFHLVntj8IplF4RwIWfkAwCowyemagnDGmKw6wQhAADQoKORNP2ZpwJxy++7Y9n64ek0BgAA2OiqSLolvioZFRwzQggAADThykgaTym94nfUGINd13W/bvktAEDzHBNNEWsrkpWML/YEVX9wfVqaUnrUZ1m2ThF1YXoAAIADptM+S0wDHYbh8OPUOio4JggBAIBq7Rldm95vGobj2/Y4Op209hjsOkEIAADxarkUw9ieZfoWVOP/diYOt5yhdG/cPRmDXScI4TZ9/991CQEAanP1CVX22BuCW+/7MReHR5/zmRHIWiJcEAIAAF3XPR8ne5ahRLyWnFK65+QzNYT3hyAEAABmnQ2lq5QOqhKjhi0cLzhHEMJNhsG0UQCgLSWnV67Zc+KYq5bho9So4VQtU0SnBCEAAIQ4EyVXheDWx757ZK1kDNc2KjgmCAEAIETJ6/OdHUFr4Rp9H2dPHlNrDHadIAQAgDglpkWeHUGrZYroHnsuX1Hbsi9pPginZ/PZcg2QvbfP/a7aX1jq5DjCTEuv+fj2Lff5/PtjaT2aO8mZdQ6AOVddn6+E2kfW1oK49mUfazoI5/7QR//4304TO33cll5k4HklvgiYi8Olx7R5AmCv0ieQmQvMPSePaWlfu+RU3Ls1G4RLK8mZ+ct7rh0C/NTahvtuJUaH/XkBuMtTU0pbDKqxFpui2SC8w3gHd/ritrqSwhXE4L0+m6K10cItU0sB4JtSo4ZvHBV8i9cH4d6poGNrU0StsBz1tuMIvRe2+/bab/1Cce64w/FjTh//TesbT+u7rrMycc7aWmQNq9tV1+f7PKb9iWe8PgjXzvhjxYNz3nocwJXWonDppDIAUJPS1+c78/Oc9/og/GY6gmjHFbYTg/faOtJnRBCAu7z5+nwpmg3CzzF9Z84yWuoMpbBfxqQY76l5n1FCAHiLvZevsI9Qj2aDsOvmz+JTcsVyUhn4l2miZRyJwunPLJ1UZu1+AHC1b9fnm97nbZYGrmrVD60sKbxISxuJjz0b8BafH/Cd9zYlrK1H1rH3SgjBsZbW5aZHCKFVzX1ztGNZW3peAMA97BvUSxACX20dFdx6XwAA6iAIoUKtxZVRQQCANv16egGAfw3D8Gda6fTESXfZ+nvFIABAuwQhVOwThmN3BKIziQIAHNfS3pEghIuVDrirRw7FIABADkEIF1qLprnraG41nlJakhgEAMgiCKFhc9NJz4weikEA7vbUsfJwqWHoukbWbWcZhYs8Mdo2fpytZyp1jUHgm/H2pLWzIFOfz7pjvYI69IN3HjxiHFfjb0evfEtOf48YBNas7aTbgeco6xUx+v6/kcLKCUIoqMSo4HTqzFVv0T0fumIQsuzdPmy9L9msV8RpJAhNGYVCSk0RXbvMROkPRjEIjB3ZCR/PdLCtYI71CupmhBAKuOt4wbumlo5/l00EZCix4227wZT1imiNjBAKQjjpqUs1XDm11DeykOOKnW078Fiv4P8aiEJBCDe4I7BKjR6KQchwx861Hfg8d61X1imaIQihrBZ3Lp744Doahz5k4f2ObkfPbB9sW97v7s/nz8ec1YrqCUK4xtNh+NQ00SO2Ti2tYVmB65TYbp55jKe321znyc8PYUj1BCHc585r+bUSg3Pm/k61LitQxtnRva77uV0VhnRdXa+lMITjBCGvdNWHVOsxOFXThzlQXs3h1sp2kn/V/NmxZTBm6T7j27fcB97CdQh5pSs+pN4Wg11X54c5cN7ZHfa927Ej271hGKoOC/7Vwus1DNtGC4Udl5scsvNjhZtbAR9cKQUh1Sr9wXN2Sum3n2nhgxJ4t1Lbobvibjxt/cjv5R6tvT7jUb7xv6f3EYVcprLg+0YQUq3pjsL4tjOPN37MLZH3tlFB4J1KHye415ljkoVhvVr+fPs2/RP4jyCkenMhN739zGMuPa4YBFpQ23GCT0UlZb0pzpeewrdRwumMPyiqohVMENKUK+Jw6XHFIFCzu48TPMrxhW1J+7uvReHSSWWgiLkppQ8RhDTrjjhcIwaBJzx1nOCZ3+P4wvr5O0MuQcgrXBWHS8QgcLcSI4Jnfv4MxxfWy9/155lJIZEg5HWujkMxCNzt6RPGlOL4wrr4W/4lCilqboWq+L3mwvTEKBGHPjyBltS+zap9+QASCEIiHYlDOy4A5dU0ggmQSBASb0scikGgBS3Hle0swDMEIYycuR4hwFNaDkEAnuWkMjBy9HqEAE/xpRUAZxghBAAACPXr6QUAAADgGYIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAANrX9z//N/ffl34u2O+nFwAAAOCUvu+6Yfh+G/8wQggAALRrKfyGIX70bwtBCAAAEMqUUQAAIIMRw38IQgAAIMPc1NLwSDRlFG40d9Kr8b/3nBALAADOEoQAAEC7lk4e4yyjm5gyCjf7bLNsnwAACpmLQjtbmwhCqIwpogAAB3wLwKX/Hh6OghAesDZKOHdNVQAAuIJjCAEAAEIJQqrVv3xobOn45zd6+2sJANAqU0ap1jAMf0JiCJ/b3SqvHwBA3frBnhoNEBZt8XoBALTBCCFN+ISF0Kib1wcAoC2CkKa0GIYJ1xxs6fUAAOAvQUiTxmFYa4R8zqNS6eIVU/NrAADAOkFI02o88UxSCHZdPX93AIA/EqZoFSIIqdbWkadappEKQQAAWiMIqdbe0b+nwvDJELzzyy8hCADwPi47QROOHKd2dcDUMiJ49XIIQQCgOaaMbiYIeb0rgqbGbUzpZRKCAECzatxZq5QpozRpz4hhyTOS1hxJw1ButNCZQwEAMhghpFlH4uxo0NUcgnOOBl1rzxMAYJYRws0EIc27+vjCVkfL9j7HrfcFAKieINxMEBJtLYTeEkkJzxEA4AdBuJljCHmdo8cXfv79tkhKeI4AtKvVmTjwFoKQ1zkSPCVPPFOj8fNa+v8AcCdfVEIdmg/CzwbkY7whWdrZ3Xv73vvwPMGzzN+DTSbb1n+m3SxNxTFF5z5bX4Pxa7l0/7G1129632/3hxlz8Tc3mwW4R9NBOLezf/bsikf/O3XygQIHzIWG0GvTXByu/Xvpto+5+8JGW2JPGML9fj29AEcthd/RGPz2c8Mw2Ci9gLCHL5ZiYHyhS9rxbbTvzGvtSwI26vv+z77WnkM5xlNJges0G4R3sBF6n8+Hi9cWiHM04GwvOWFvCE753OYoa8x2TU8Z3WLLVNCljZQRwffy2kIBdtCet/U1ODOa51hRDig55dM0UrjW64Nw7dprTjgCcMKWE5RwrS2vgXjjRldGmzCEa7w+CL+ZjiCKRABeQwxyk6ORdmS/y5nEoaxmg/Azp/zMWUZLnaEU4DU+JxRxltH3O/paWxcYOTtad+YahK5fCGU0G4Rd93NDML6tFIH4Tl5TanT2hAmn1urpe2LuLJPeN+1aey23vNYCkAVn9pPGIXdmKqhppHBeP3jnwB9v/RJg6Xm99fkCcJ0z8XV1uPlc48O6sF3TI4QAANzjbMzt3UE/enyh0ULYRxASJ/Ebo6Vnm/VXAOCIUoF1V9yZRgr7CEIAAGaVOk7wqDNnFBWG75b4Bf9VBCEAAD/UdpzgU1FJfQR+eYIQAICu6+4/TvAoxxdmKr1++ZLgP4IQACDcU8cJnvk9ji/MU/r18iXBfwQhcZLf8AAwVmJE8MzPn+H4wgxXj+KZUiwIAQAiPX3CmFIcX/hOd69jya+/IAQACFT7cYJHnRkx5HklR63PnhwpZb0QhMRJeoMDQGm1f4Y6LqxtJUetz0wNTlqPBCEAAIta3Ck2FbQtV09fPhqGKeuRIAQA4B8thuBUy8ue4O7rXR4NvLevR4IQAIAf3j4iwrOevt6l4wt/EoQAAPzwpp1d6lPD2W0dX/iXICTOG964AAAtqfUyJ44v7Lp+aP0ZQEFveFMDANTi7uMEz0rcFzRCCAAAFPX0cYJHJR5fKAiJ0+Ib9bT/b9j+SHv+AMCtapweulXa8YWCEN6u7/8NwLnbAAAeUmNEnb1MRSuDEIIQAAB4RI0hOHV01K/m5zQmCInTypsTAOCtWgjBsTPTSGsnCAEAgNu0MpVyzhvDUBACAACXe1NEtXac4BpBCCOtv6EBAGrzphCcavGsolMuTA8JXHYCALhZ66G0V6vPVxACAADFtBpGpbT2/E0ZBQAAinjDMXVntXZ8oRFCAACAUL+eXgAAAACeIQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAACCUIAQAAAglCAEAAEIJQgAAgFCCEAAAIJQgBAAACCUIAQAAQglCAACAUIIQAAAglCAEAAAIJQgBAABCCUIAAIBQghAAqF7f//e/6W1z/3/tNgB+EoQAAAChBCEA0IRhMOoHUNrvpxcAAKAEsQiwX/UjhP3OrXvf97t/5mkJzxEASlgbJRyGn/8D4LtqRwg/wTPs3KJ/7t/3/e6fvVvCcwQAAOpVXRAejaSp2kOpRMzV/hwB4AqOJQQop7opo8MwXBI6tU2xTHiOAABA3aoIwiMhs/dnhmF49Ni7hOcIAFeZfo86PU5w7ntWE2kAvuuHB+cdHpkeOv6Zo9NL7zz2LuE5AgAAbXokCM9GUonHu8PeKGvxOQIAAO16dIRwiz0hVDo073L1c/z8XOUvNQAAcLPbgrDkaNkVP1siDI9E19FQazV+AQCAelwehEfDpcRi3RVoT8bZnREKAAC8y2VBWMsI1tWjYneOfN75mABQq753BlGAUqo4hvCOoHk6UGt9jp+fq2A1iLC0EzO+fct9AJLZHgKU87vkg9UwWrbk8zv2/M65n6l5iuaR5/i5n1HG+9iRAQCgFkWCcG9MPBkfR8Pwc8H3WoN37Gz8isJrDYMoBACgDkWCsIUQnNobTS2E4NSZMASAGvlCDaCsolNG19Q68lRyZKyWEJwy+lefb6OE/1+VAADgUpcHYa2RNHX2OLoWYsuxgnVZi8Klk8oAAEBJlwVhi+FxZIpla8/z6IlnAACA9ykehG8IjS3R1PrzFIZ1+IwSAgDAE4oF4RvDYumyE+P/1jph+DxRCADAU4pcmL6F4+dKSHieCc8RgDY5wyhAebdedoL6eS3hAkt7sdPbx0PFW84s9O39+u3xABb0k+3NeP9g6cvjvbfP/S77IVRj72duw2677AQAK+bicO3fS7ft+XmAGXMBd3QG0TQsv/0uM5Wowt7P3Mb9enoBAOjWP2TWrk/iAFSgoKUgu+qSXOKP6gR+5gpCuNBLtxtc7ei3kFY4Xs86/gZLo4ZGB+EZpozCBT6fdT7X+GNrrJ2ZkmKFA26ydyro2JHjDHm/b+vUmtNrTvi6JwihICHIoi0niXnx8QnAu6xdo3lv3IlBus704ScJQihACHKaGOQA15ClVtPRnqNnIAWuJwjhBCHILT4Hsged8Yx10xAUhpQyDMPps4yWOkMpPCLwM1cQwgG1heCLt1FZ1q55NHd2s7UPq+n9rSDNW7tW2zgM7Xhz1icKp7eVMnepiat+Fxyy5TP3RfrBu24zH7TUFoJjNS8bcMyRC3a/9bPqrc8L4GlGCGGj2kfhPstW+3IC352ZAjoe3RFQAHwjCOGL1nasxrMcGllkoDs2GrjEFFKgNS478RxBuEP2qpKntRAcG48Wjv8N1OfKbc3SCUIAamM79RxBCBMth+DU3zC0Qwg1KTUauGV7ZQopAGsEIYy8NZzsEEIdSr0Px4/T9/3XbVfrU0jPTCUDYJ2zjO7hbB2vlRRLSc8ValBiNHBLyG2NvZaicO56i60sO0ArBOEegvB1kuMo+bnD1e6cElrT45bw7W9X87IDtEgQ7iEIX8U3zf+xcwXllHw/3bGNqmU76HqLAM8RhHsIQgBmnA3B0l/M7ImlJ8OqxN/NbgzAOYJwD0EIQEFXjtDveew7ZwqUvN7i+PHszgAcIwj3EIQAFHLX6FYto4VXh5vRQoBjBOEeghCAg1oJlpLLWeoMq1t/vpW/MUBNBOEeghCAnVqc0ljDMZHTay1ufbwW/94ATxKEewhCAHaobcRq7/LsnW76UcO00Nr+9gC1EoR7CEIAGrd3BG0trEpfb/Hs48w9rt0cgHWCEAACHRn9+9y/1LTMO6Z3ikKAdYIQANikpuPzzgQtAH8JwjWjKSxd1/2cLjo3fdSUUgD4x5VTQvc8ptFCgH/9fnoBqiX4AAhyRSxdPTK3dwrr54ylohDgL0EIAOy+vMPWx7zDnt9zxfMEaJkgPGM6pRQAGlbjmUKvMB5ZrH1ZAa4mCM+Ym1IKAEFqHG3bGnqmkAIIQgDghBpjas+0UFNIgXS/nl4AAIDShmH4EXul7gvwNkYIlwzD+mUnAIDqHTnhjJFCIInrEAIAAIQyZRQAiGBKKMC/BCEAEOEzJVQYAvzlGEIAIIYjZQB+MkIIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQKjfTy8Ar9U/vQAAAMA6I4QAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAECo308vwJX6vn96EVYNw/D0IgAAAMFeHYSCCwAAYJkpowAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKEEIQAAQChBCAAAEEoQAgAAhBKEAAAAoQQhAABAKEEIAAAQShACAACEEoQAAAChBCEAAEAoQQgAABBKEAIAAIQShAAAAKH+B8WGheFiotBxAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=900x600 at 0x1CE2B0E6358>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Draw.MolsToGridImage(fragments, molsPerRow=6, subImgSize=(150, 150))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(19, 19)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mfp = np.array(AllChem.GetMorganFingerprintAsBitVect(mol1, 2, 1024))\n",
"len(np.where(mfp)[0]), len(list(fragment_counts.values()))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4QAAAJYCAYAAAA6xSjbAAARx0lEQVR4nO3d7W6jSAJAUTPK+79y7Y+Vdx0P+BsTuOdIVrcJselWppjbVeBpjDFO8HnT1gfAIRmvWIPxijUYr1iD8YqP+2frAwAAAGAbghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAEPWz9QEAABAzTb+fj7HNcQCCEACAL5qmfwfg3DbgKywZBQDgO5bCb4x/zxoCXyEIAQAAogQhAABAlCAEAACIEoQAAABRghAAgO9YunmMu4zCZnzsBAAA3zMXhWIQNiMIAQD4LgEIf4YlowAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAfM80vfY1YBWCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARO0+CKdp+vW4/trS9zyzfe69AB61NGRcbn9kn/Pz8+PW+10/AADm/Gx9AO+Ypuk0xri77dHXeua9Xn0foGmaTqd3h4zr17j1moYnAOARu50hXAqyV2NQ3AFrGuP9mTrDFADwabueIVzbZSiOMX7NIgpIYCvnoejWbOFlfBquAIAlhw/CZ5eCXrq1RNSsIvCs8yzh0tDx6Azi+fsvX+vyNa9f/xPLVQE+ZTqdTktD0q2vAes4fBDORds5EkUd8G23onBumxvCAABr2u01hJ9yfedQdxAF/qpHhyfDGADwqN0G4fU1fWfPBN0Y49fjvA1gTZ+4wQwAwCfsesnoXBR+MujcVAZYyytReP09SzeVubUfAMClaSgb1mH+gzUYr1iD8Yo1GK8W3LqHg/s73GW84uN2u2QUAACA9whCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEACAzUyTj9aDLf1sfQAAAHScP3j+HIJjjF+/B75LEAIA8DVz8TcXicB3CEIAAFb3SOwJQ/g+QQgAwGpeibvLMBSFsC5BCADAKt4NOtcXwvoEIQAAH/XJiLOMFNYlCAEA+Ig1o80yUliHIAQA4C3fnL377zLS8+9Xfzs4PEEIAMDLtpixO7+dMIT3CUIAAJ72F67pE4bwvn+2PgAAAPZjmqb/zQr+lWv5xvjv4xyGtyztc7n9kX3Oz88P2CszhAAA3PUXZgTvuYzCW4c5Te/PJl6/xideE7YgCAEAWLSHELz0yDLSczi+80fayV8H3CUIAQCYteePeLgMw7X/CGYH2TNBCADAL3ubFbxl6Y9wb5bw0esCxSB7JwgBADidTscKwUfcisK5bXM3lYn8VXFgghAAIK4Wgp8gBjkKQQgAECUEH/+4CjgqQQgAELTnG8Z82qtReP09/jrZo2kYCViHf2tjDcYr1mC8Yg3GK9ZgvOLj/tn6AAAAANiGIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARP1sfQAAT5i2PgAOaWx9AByS8QrYBTOEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAg6mfrA3jXNE2/no8xfn3t8vmr25/dBwBWN02n09z56Hr75Xlyaf9Lt85x1/ve2x+AP2/XQTgXZ68G23VYPvt1APhz5uLw1vOlbWdz+wKwa7tdMroUfq/G4L3vG2OYGQRgX+7N9s19fYzHQu9WOAKwG7sNwm8wKwjAYbwacM6FAIe26yWjj3hkKejSzJ8ZQQD+tEdj7Z3ZvEeuUwRgtw4fhEs3jzn/KvoA2K1HbhIj3gC44fBBeM/1DKJIBOAwxCAAd+z2GsIxxuxy0Geu+zvfKObyhjFiEICEpZvH3ItIkQlwKLueIZyLwk8GndlCAHbv1ucMzkXhMx9DAcDuTUPxsA63pQP2wnmQNTgPAruw2yWjAAAAvEcQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIOrndDpNWx8EwIPG1gfAITkPsgbjFbALZggBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAEDUz9YHAHB403Q6jXF/+zT9//dL+1+a22dp/3v7AlyYrsabcTGGTNP06/mr2+fe69Z+8FXPnnN3TBAC/AVzcXjr+dK2Z74fYMZcwN2Luluv9cx7vfo+8FHPnnN3zpJRgL/g1klm6SQ0xr//BRPgDUtB9moM3vs+8cefEzznCkKAv+bVf4U86IkKOJalWUOzg7ANS0YBvuHRWHtnSYr/kQK+5NmloJdeuc6Q47v3M3XL2z858Z89QQjwDY/cJObA1ycAx7IUdedfn4k7McjpZPnwlgQhwF8gBoEDuZ7tefUOpMD6XEMI8NctXcguIoEPG2PMLt17ZjnfGOPX47wNdiF4zjVDCPBX3PrMo7kT1K1bYl/vf9CTGPB5c1H4yaCb+6iJtd4LXvLIOfdAJv/RATtiwGINbs/KGoxXwC5YMgoAABBlySgAALApHzuxHUEIAABsymVs27FkFAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAECUIAQAAIgShAAAAFGCEAAAIEoQAgAARAlCAACAKEEIAAAQJQgBAACiBCEAAEDUz+l0GlsfBIc0bX0AHJKfK2AvjFfALpghBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUYIQAAAgShACAABECUIAAIAoQQgAABAlCAEAAKIEIQAAQJQgBAAAiBKEAAAAUT9bH8Capmna+hBuGmNsfQgAAEDYoYNQcAEAACyzZBQAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACBKEAIAAEQJQgAAgChBCAAAECUIAQAAogQhAABAlCAEAACIEoQAAABRghAAACDqP49KsPbPjB1XAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=900x600 at 0x1CE2B0E65F8>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# When applying ordinary `Chem.MolToSmiles`, some fragments are not seen\n",
"Draw.MolsToGridImage([Chem.MolFromSmiles(Chem.MolToSmiles(Chem.AddHs(mol))) for mol in fragments], \n",
" molsPerRow=6,\n",
" subImgSize=(150, 150))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAADdUlEQVR4nO3dwVLiQBiFUTLl+79yz2KqKERMQu4k/Xdzzk7UkgV+3kBTLq21GwBH/el9BwDGJqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQg8tX7DtDTsiyPH7bWXt5+u7WHr7l/7/evaN8+XJbtW2AOS/PQ5gT/Ivv44JJRZuWinrO09rxYYUoyChCRUU5kkPIJZJRzKSnTk1GAiANPn+jHeaZNe19if/la/GSDdFl+Pd+y8ikmJqOf6Ppf9WlKqqH85KKeU/zsicIwKxmFvUxRXpJR2EsoeUlGIWWKfjgZBYjIKGxbOSJmiiKjsEEoWSej1DLW8VKF5Saj9DJKLh1yYpOM0sdv72uq9n4noWSTjFLOEOEyRbmTUbqpNjzhGBmF1xxyYicZpaeVQdp3qAol+8koRZW95FdYnsgonRXMpUNOvEVG6a/a4Seh5C0ySmmtHfiXJ2cxRXlJRimh4KU97CSjVNdau2yQOuTEATJKFSuD9Jp+CSXHyCiFlHom9JHCskJGGcaphRVKDpNRall5JvTUJ0lXGqqwrPvqfQfgbU8xvTfuf93+9LM0lHUeIlRU531EMsomF/UMRkOpRkap6MqzohCSUYrqXlJTlJ1kFCAio9TVcZCaouwnowARf3Kprssg9XvBfo7fU5qLa+pzUQ8QkVHqMkUZgowCRGSUokxRRiGjVKShDERGASIySjmmKGORUYCIjFKLKcpwZJRCNJQRyShAREapwhRlUDIKEJFRSjBFGZeM0p+GMjQZBYjIKJ2ZooxORulJQ5mAjAJEZJRuTFHmIKMAERmlD1OUacgoHWgoM5FRgIiMcjVTlMnIKEBERrmUKcp8ZJTraChTklGAiIxyEVOUWckoQERGuYIpysRklCtoKBOTUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChD5C1pGGizUmq6DAAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2b0ec210>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Fragment must be turned into smiles, disabling sanitazation, because the number of electrons sometimes too many/few.\n",
"## Saw an issue: https://github.com/rdkit/rdkit/issues/46#issue-15275837\n",
"Chem.MolFromSmiles('ccc(C)nc', sanitize=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# nRadius=3"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[((8, 0),),\n",
" ((4, 2),),\n",
" ((5, 0),),\n",
" ((4, 1),),\n",
" ((6, 1),),\n",
" ((2, 2), (10, 2)),\n",
" ((1, 0), (4, 0)),\n",
" ((5, 2),),\n",
" ((0, 1),),\n",
" ((7, 0),),\n",
" ((2, 1), (3, 1), (9, 1), (10, 1)),\n",
" ((1, 1),),\n",
" ((1, 2),),\n",
" ((5, 1),),\n",
" ((0, 0), (6, 0)),\n",
" ((3, 2), (9, 2)),\n",
" ((2, 0), (3, 0), (9, 0), (10, 0), (6, 2)),\n",
" ((7, 1),),\n",
" ((8, 1),)]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(fragment_counts.values())"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAACCElEQVR4nO3UwQ3AIBDAsNL9dz52IA+EZE+QV9bMfACc+m8HALzNRgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAxEYBEhsFSGwUILFRgMRGARIbBUhsFCCxUYDERgESGwVIbBQgsVGAZAMTDQQpVReZYAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x1ce2b0ecd00>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env = Chem.FindAtomEnvironmentOfRadiusN(mol1, 0, 8)\n",
"print(list(env))\n",
"Chem.PathToSubmol(mol1, env)# Returns None"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compromising Measure"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"RDKit ERROR: [16:10:39] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: no atoms\n",
"RDKit ERROR: Violation occurred on line 157 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\ROMol.cpp\n",
"RDKit ERROR: Failed Expression: getNumAtoms() > 0\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n",
"RDKit ERROR: Violation occurred on line 162 in file C:\\Users\\glandrum\\Anaconda3\\conda-bld\\rdkit_1533270029125\\work\\Code\\GraphMol\\Atom.cpp\n",
"RDKit ERROR: Failed Expression: d_explicitValence > -1\n",
"RDKit ERROR: ****\n",
"RDKit ERROR: \n",
"RDKit ERROR: [16:10:40] \n",
"RDKit ERROR: \n",
"RDKit ERROR: ****\n",
"RDKit ERROR: Pre-condition Violation\n",
"RDKit ERROR: getExplicitValence() called without call to calcExplicitValence()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4QAAAJYCAYAAAA6xSjbAAAlkklEQVR4nO3d25KjuBIFUDMx///LnIceTlE0YC4CpZRrRVTMlLtsYwxCmxRiGMdx/AAAAJDOP7UXAAAAgDoEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAgKQEQgAAXjEM3x8/8jfT79MPcJ1ACADAa0oEuGH4fMbx50cohOsEQgAAXlMiwI1jmWUBPp9/ay8APG3476gzOnoAQHemaiHvG4KXZvX9jhEI6d7UGMwbLQ0EANQzVQm3DsdHc4YwWM8wDOH7Uy0sYwQCIWnMGwThEADq2guFa4+tTSrjEA73CYSkJBwCQLuEwbpaqbyN49jMstYkEJLeejgcHWgA4GFmCIX6zDIKM+M4/vdz/f5Ga8/5dn8lB0MAsrp6AnZ+nHYcfU9rFbepSsg2FULYMG/r5u1IQ20gAISydQydP37nb4DzBEI44Gw4/DZ7GgBAa1qrDk5cS7hPIIST1sLh2fbFyAUAoDe1h2YKfNcIhHDDXrtzZjptAREAiOxIhS1yIFMl3CYQAgC7lmf9daggl16ClFC4TiCEB5lOG2jJ1nCvZedJhwqgHwIhACRzNPhtcZYd8uhtX9d+/U0ghMKW7cu337ceA7jrbvADoH/D6KgAAF14+1o/Z9mhbz3v4z1/trP+qb0AAEA54zj+/6ektWrjNPQK7rAJQV0CIQB04Omz3VvhTyiEPvVeQdN2/RAIAYBDhEKgJ9quPwRCAGjc1pl8HR3git6rg/wmEMJL9MuAHqgSQt+yhUFtl0AIAF16slMnFAL0QyAEgIbVOpsvFEJ/slUHJ9nbLYEQXjKOho0C76jdqcveuQJoiUAIAI0S/P5YLsL0+9bjwN9qtye1RWnPahAIAaAjb3fqogwdTdqPA7hNIAQAbokQCreG5Q/Dzw+wLnt1cJK1Sth8IByG4dfP8t+2nnPm8bX3gitcR5jT1nc+f/zI30y/f+vczv9GR7hfrXTganewxvHnB/hbK23JW2q3WV99O8Af7VDM/Fto0apY24CvbtRHwuD8de08wBnDcL9DunyNvdfUPOVU89g0daLW3n/v347aOkzPX9JJN6Brawf+Ah2MZgPh3kHn6muFPhsANG3qqN5ps4U8orsb/H6Ow2vH96PLIBTCUQoc60qcxCpuqxNRoIPRbCB8w3xDWAbGUBsIkMrUFO1VC+cdYs1Vf0qOkCltqyO19vjyROzV5V8+bfp963EAfnQfCM8OBZ3bGyIa5cBLe0pUimjTt+/+aFVjev78tZbD5pava3sjAidXWTd81qrCPEtfdl/IKuFRJ4dJdB8I177E6WDU7JcMjbGv/dgLhWuPGfrGUgv707eOVPTlB2jayQ5F87OM3rWcOdR1hFBWC53XVhxtnjRj+UTcz7ZOyEZbTsjIvnhMljlGmg2EW1/QmS9tHMdfP9Nj8Lz+G5fPxwFni0kv4H32OTguQwiaNPNZ9262mnWW0c9nPRSW7HyaVAauEwb3XQmFy+dsTSqz93e0LfJkMt/UXk7Xb8MfR/bFpq+fO+HMurji9tpbmxlruSwFvqOmA+Hnsx/Mzl67sDYj2tH3An5kOIicceRegUfvJ3j37yAzoRDyhL1Swq2rb8tytKMw0+yQUWhZz2PSwzWc0AnVwTIM2YZjeu6rfD6x2qXamq8QAjFMBw2NK91Yu7nj/J4fk4e3+Z47ZEAdqoTHZFlHAiFw29EGM0vDSuf2guLtl/47/O3dA9f+dI2ho+/S9rer1+DY42e6QyAEbhEG6doT1bm1GbL/+2+P+0jUff9PKIy5bL0wciS2XsNeKZnWjUAIXCYM0r1lJfCstees7AtH9w4duLKsz+dYr/3obT/p6bOUYlIZeFiv1/+caVA1vKQ1jn//3H7JdiZ60PHKy/fehpbakzdla7sEQnjQXoPSciOcraGEX6YL0KafCvtCy+1HNNZlOdZjm47sA73sJ0f6Lxn7OAIhcMqZYaLQtL2bOxas+PWqpU5VL53dWoZhaOr75prW9xPb6DaBEB7SY3ByzSDE0XrnLBrr87x5ELzS5s+fP/0/ddj+/8jafzGpDDzkzJCEeSMctSESBiGeqJM9RFwmyrv6Ha/NPro8Htp+Yora5nzT4jK/SSCEgq6GpuV9xuYiNGDCIMTVagctIuvyuzvr50jYEwzryb79Z/7sAiEUUio0Lf+tdvXQbKJAJtqxdXcC2pXnzoOh7ySW1oJjS8tai0AIBTxZQduqHj7duB09gGtoob5IHbQoy0EZdyt1d7eH+bVttqvnRWpL3pTxM88NY+ZPDwXUGk755NBSQ0ShTW/sk9/aHu0Cn88zQz4Fw/f0cnuGXj7H01QI4aZawempoaXCILSr5Nn9rRkH915bu9CHp68TvMr1hfAMgZCmtHgQOHp26vO597lKDC0VBiGXvWnm7eP5vH2d4PS8q5dSOBY958jJpejDS1UHjzNklCbVDoZPBacnrhE8OrRUowj92NqfIww1J57a1wnWCKJ81/JJ4paXvQaBkG68NeHK243Mk9ceOssK/YpwPSF9Kx3GBMN4Wq2ytbrctQiEdOmpA0PtCWSe+DxHX1fDCe2peT0h/YocvhyrymstXLW2vBG4hpAuPbGT1xx+8NSF9Ec/T8n3BOIS/Njz9vHg6vWFjltwjgohYT01FKXka669/tu71JPv6wwatG9tPxb8OKNmwDKMNIZWqm6tLGc0KoSEtayKzR+783rz1yzVaNRsXEqvp7XXBto1r5jMH4Mjanee71zr7jYVcIwKIU15qsp3Z6KV2gfLbxwIATirx2NH9ON1dNGrb9GXLzIVQppS4l57R1+3hzD4+ThDCsBxrRwrXF8I5agQ0oW3bjmxfE+7DwA9aDEoub7wfVGrcFGXqxUCId15IxxqVADoQQ/B6M4xuYfP/7YW+0AtLvObDBmlO08NK52/pkYFgNb1cjwrMeFcL+sCrlAhJI0S4dABAwBic6x+XkvruKVlrUWFkDTuVg41KAAQn8lj4BwVQtI7Eg6FQQBoj+P3c1pYty0sYwQqhKT3rXKoMQGANjl+w3cqhLDBcBMAgG3zE+kR6cMdIxACAAAk9U/tBQAAAKAOgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACApgRAAACCpf2svQHjD8Pv3cayzHAAAAIUJhHuG4e8AuPYYAABAgwwZ3bIV/Mbx76ohAABAgwRCAACApARCAACApARCAACApARCAOAvw/D3JfPz39cup3eJPUB7BMItW5PHmGUUAADohNtO7FkLhcIgAAcN/x1DxkaPHdNhsNHFB2qb96M1JGEJhN/YeAG4aAqCwzA0Gwr3GCIKbFqeTXJ2KSxDRoHHDXqNJNdyGNy7/e44/v4BoD0CIfCoXisjcIeTJABEIRACjxiGQRgkjbMBbxzH/+8jLdirEvamle8EaE/Udt81hEBxgiDZTAFv+v+jz/l87C9RtD4BEISzPJOUdN+aB8Co7f4wRloaoHnRGjl4m2DRFt8XUNpaCFz7myjtjkAIL4i00z9Fpwp+u7pPZGgvItBmASUdCYFbz6ndDgmE8KAoO/rTdGBh25V2oFbbkWFW+CztMvC8KyFw63VqtkkCITyk9s79lqOfM8v6gC1X9oG39pupT9P7LqodAu66GgK/tT812yeBcM/eqdIMp1F5Ratnq88st04Y/BFtf88UBD+fOOsdutdZP/mtSmCt/pJZRqGSZQelpQ7LmQZLGIQfd/f3UvuTIAi8rrGQWCoEzn2bkXr697fbLIEQCrozfHLZUZw/FsnZhiriZ4DargbDK7e3mKsZBN/sCwqCwFVPtx/fbj1xt52/QiCkOREP9CWHT87/LVo4dL0glHUlGF65j1WEiuD8lmRPLUfE4wOk01glcM0bbcheNfDt+xUKhDQn0k09zwbBo387WQuHNT7z0ffWGYNr7gTDbyK0lZNpMUr3F7U9EFwHIfEJ34aIvjWE1KQye0wqQyEld+a3q4aqgvC+EvtT9JBUqlqo7YFg1vrIjfSb19qTVwJZ5clmVAjpRrThlZPSO/GbVcMzyx5pnUPr7lxDEj0ITn6qhdfayFY+J8A3tSebEQjpxltB6UzF7Mgy3NnBn5qIRkcL6rsyjLTFatnZ8Kt9gsAaqQRGVHOyGYGQsO5s9E8FwaOvfbaydncHLxmG3VICYjkSDFsPSRk+I6TVcEis0c+pMdmMQEhYd+/XNXe3gvZ0SCr5We9UDUtXP4Fy1tqJ3vbFDJ8R4Ju3J5sRCAmvxLDIuxW0t0LS1lnxu1XSb+uu9yFp0JNIMy0/Yf65tv4fiGn4fD6t7qXR2pg3rysUCGnGmYBz9HVKebIReXpIqRvNv2e+3X4+f38vZ04IHPneoh3c4CzbL4cs2tbDM1w2PJSxJS2HxNreOiEmENKk0hPIrAXMSLdbKNkglByeynElp7JeBsuz/w7QjYZvcQBHmWUUvqg1pLRGoCoVKKbXOhIcVJnu+3Zx+JXX2vvu1vYJgO5sBb9xFAqDaLkPEW3ZS/YBlwRCulGqahilKnjEkxMuqCC2Jco2SVm+V6AV2qt2CYQ79sY8Gw8dW4mq4ZZoDd4TQ0CjfcYsjgwF3ZtxDIANRkywI3u/RyCka6Xvz3fn+U9bG0qw9viV1+IdezPAZj9YAdyyNakMIBCSxxv354vE5DH9WVYQW9wuAcjBMaodAiHpnL19ResN2t6yPz1rFX9srecz6/7Ji8kBmrQ1eYwJZbgp+jG29PIJhKT27f58y7+Bq9ZmBi25bUU/eHGN75SI7s5gfGurXu4TUyjc+xte8eREdzxLIIT/CII87Vu19s7jpW5rAfBNuLbl2/Js/Xu0z9GorXs5t8IJVYEQ/pK9UQAA2PPtkpt5MNSvik8gBICgMnamtj5trrUA8VyZlM9cBW0QCAEAgL+UuJ9za9cWRg+wTyyfQAjJxW3yAIC3lQiBS4aQxiYQAgBAYk+EwDV/qoVx5vMRUP8QCAEAIJm3QuDS1u0jo8gYEAVCAAgqY8cEeFaE6/kihMKWbo3xtH9qL0BLbDh0ae2mvgBAl8ZxDHGyqVb3YxiG/w8VnU9404KnhriqEO5Yu1F5hLMqAADQujcqhb/z3t+Byq0xBMJda+FvLSQCwBOyd1KA/j0RCuch8Pfrrr9J9qKPQLjiyAZh+lwAALivdFf6yutF6tu/HU4FwpkrKz/7GQUAAChlWS2cft96vLRaQ0hrzfr6+QiE/3fnizeMFAAAyqg9A+mbofBofnhyedIHwpIhTjCkWRHmfwYA+Gx3S96cEPTJUFizGrgmbSB8MrT9BEP9awCui9BRAIhiOWT0+fcrd2nYnRD49K0x0gXCN6t38/urOKYDwAFbB0wHUkglym2S7042czV7vFlFTBUIa1wgOr2dYAgAAMdFCYWfz7khpFfDXK2hpMOYYDxKpGv6DCMF4Ki9zkeEqdEfsez99fgZgWZ9a5c/nzZC4FzXFcJIQXByZhjpVnicP37kb6bf58sAAOFszSLhwAUEsVcpPJo5IoTAuS4DYcQgOHdmGGmJ4+Bb922hUXtn43XOAAB+uTLZTLQQONdVIIweBJeOBMMSdwNoZHVQg8AHobVyPAPI5shkM5FD4Fw3gbDlaym+Df8sSV8fAADKWA4hbSUEzjUfCFurCu7Zm2l7L8gdnX1JGOSSKNN7AQAEVPJ+hTU0GwhbXulX7IXCrUlllr8nWVWUtjakFKguy/EPoAUtt8nNBcJsQbAEYRCAJqzddMwBDOBRzQRCQTDWzTkB4BGJj/MANTQRCFueMKa0q6HQCVdWORsPAJDaMEpaAAAAKf1TewEAAACoQyAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEAABISiAEALYNw++ftX/feh4A4f1bewEAgKCG4fMZx++PAdAsFUIA4G9bwW8cVf8AOiIQAgAAJGXIKABwj4ohQLMEQgDgnrWhpUIiQBMMGQUAAEhKIAQA/rY1eYxZRgG6YsgoALBuLRQKg10YhuEznvguh/+2gzPPAdowjPZsAIAU7ga7s0ESiE+FEACgc6UqfMIg9Mc1hAAAnRvH8ZEwN5hNFpqnQggF1DogOlMLwJYrwzvPPmccx+auLzTsFX4TCOGmmgcWBzUAlq4EtPlzzj5/+rvox6Q7nxF6JhACAHTgbhCczAPemdeLGq5KfkbokVlG4YYIZ0MjLAMAbTkThEoFzbc9/RmhFyqEAACNeuN+gleqabUrcFeuhZyeN/8dMlAhhIu2DjZPTzCz9Z52ZYA8rlbtShwr3pis5qpSgc5xlUxUCKEwBxCgW8Pw+WjjqoowfPPOxDNPifAZoVUqhHBBxDOHEZcJ6IxA2JQ3Ak3tgPrG+wuG8W01TfPHj/xNViqEAACBvXGd4FU1ry+8s16GYTj8fNcXtkGwu04ghJOiVuKmA1zEZQPgvJYqWXeD4RuBd+u2E1fut3jl/XnWOAqFVwmEcEL0wCUU5uF7hv61EASX7gTDb54IyFfWmWBIbwRCgIbogABzUU8OXa3+rXmzUnq1YhjxO8joW5Xw4YngmyUQwkGtNPiqhP3yvQKTVk4O3Z2t88pw0jvr5OoMqq18HxnshcKtSWWyEwgBGqGjAbQYPK4MsSxxneBVri8kG4EQDmitMqNK2A/fI/D59BE0joSmq7eReGK9uL6wXVOVkGMEQoCA3p6OHYipx2CxFppq38/w6PupGLZDKDzOjenhi5Y71i0ve1YlguD0fB0RijOn+6uytOFXPmetdXO1Xc3yXdKmdgPh1kFp+fj81MCRK0n3VsfaaYZGVx/H9NCA9/AZ+K70ECxYJRDygBaPUy0uM2zpe8joWjjc+33rscna3wLcdKdjceY+W4Ih0KpoASzSssBd/9RegEd9q/ZtzUd7JOg5S9q9aAefq+ZDB9+yfLvp963Hp//Pdo5lGIbL29n8uWeuaamxPQBcdaedfJN2lZb1XSGcuxrgBL9y9iq034b20pwzu85yU+h9E6g9YYzrC4EWtBAEJ9rVd2XoK7yp7UB49GzMna3myHWK3HNkKO/LWjoIHVHjNhRbN4Z1KW6ZCWNKLYMODGcMn8/HlsJbWmuXWlveFk19CKu6rLYD4ZFJYgKEC2YMqUhva86n6fded9enrxO8SjAEIDpB8FltB8Jveu5dtmovDQTRW3VwUrNKeESvu+udoPXm9ObzYNjj9g9AewTBd/QdCPfsjWe7MhkNXWuxk/zkMm+FvKvzNC0nnWlsVa+qeZ3gnWtZXAcDQG099Qda0H8g3LvP4FpP9WxA5L7l93BkfZ+sLp7567VOcI3q2h1by3p+hsn1z/ttNSz/ffr96OM9qD089M5QUMNIAaghWhDMEgPavTE9PGQrTPUQCFv6DNlEDl+2Gya2BZ5guyJaEJyLvGyl9F8hhEJaqRK2sIz8eDsIXr2+MHJgBaBd0fstGW6LJRDCQivB76weP1PLagWsq+HOMFLgKVqTnFo7nsyvcGpkkQ8TCOGE6GEx8rLxo/b3dGdGUcGwb7W3TaB/LR8/5tXC+e+tcw0hbNjrGEXsNLW2vBm1fBDcYtvqw9Ft0/fNI3oei8cvvbUhvXweFULoXC+NVctaCYKuL8xJGwE8rdfjRC/HQIEQNuwND402dDTSsvCjtYOE6wtz8n0BT8lwXOjhGCgQwo5owe+slpe9ZS0fFFxfmIO2AXhSxuNAy8dAgRAuihIWIywDP3r5Pu58hjuhkme12FEB2pK97W8xGJpUBg6IOmFL1OWiP7antt3tmMyf31Inh0aYVAaqUiEE4CtBoG0lguDytWwPAH1QIYSDolXjoi0Pedi+2nDnezoT9gRDblMhhKpUCOGgKNcMftPCMtI221dsdwLa3VlmbRsA7REIoYC3w6KOF7BU4jrBu5MJqRYCtMeQUTip9lDN2u8P9OWJECcYcooho1CVCiEANOqt6wTPMvEMQDtUCOGCWlU61UHg83n/OsHpebWGowLwHBVC6IDOFuRQ8zrBO9cIur4QIC4VQrjo7Wqd6iBwVekwVqNCSUf+2wb+b74trF1P6BpDeJRACDdECGIRlgGIKXL40nYl9S3wCYTwOkNGAaAzbwfBK+HOMFKAGARCuKH2zeqdYQfmagWsq+HObKSsWg4pBR4lEAJAB2qfIJqHuyvVwum5899Jam3IKPAYgRBump8Zr/HeQG7RQtSd5bgTKgG4RiCEAnRcgLdFC4JbXF8IEJtACAANaS0oub6QX8Zx/7YTwOvcdgIAGtBDMLozFLSHzw8QkUAIAMG5pu6HdQFQ1j+1FwAA2NdrALoyIVev6wKgFoEQAKhiur6w1kzNAAiEAEBF4zhWvX0PQHYCIQBQnaGgAHUIhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAAAEkJhAAUNQx/fpaPrf3/1r+vvQYAUN6/tRcAACbD8PmM4/bvAEBZKoQAFDeO1yp8wh8AvEuFEIDXGQ4KADEIhAA8YqoSrlX9lo9tXXOoYggAzxIIAQjFdYMA8B7XEALwmLPXEgqDAPAuFUIAQlkGSAERAJ4zjKNDLQAAQEYqhOQ1L0M4LwIAQEICITm5+zUAAJhUBgAAICuBEAAAICmBEAAAICnXEJLT8uZorh8EACAhgZC8Og6Bw39h111lqM18TQAQm0AIHRgWd/KeguAwDEIhAACbBELy6aRkMQ+BW6FvHEehEACATQIhNBQQj4TAJaEQAIAtAiG5NBT+Pp/toaBnCYXU0NjuBgApCYTkFrDHeqUKuPYay+cKhQAALAmEEETJmUG3wp9QCADAnBvTQyAlg9oU/o4+/ohh+PmZP7b8GwAAqhAIySPg8NCnVQ2F0/qefgQ/AIBwBELyChQQ14ZxlgpsISqFS2uVQwAAXicQkkbm6BEuFM4rh3Qp0PkWAGBHu5PKbPU2lo/PO7tbfz/3rQfz7fUIabUC9/l8on6DT0z88vhEM/YNqGLv9jRb+/bZx9fey+RUhHG2Lwf80m4gPGItHO79vvXYmefThKwzbRYNhd8OwMvrBhOt75IzxsKereHmV7a9b6MFlq+btR0lmLN9OeAvfQfCvcZgq7GYOrEaEl7ydqfqTigchuGnqnpkmbf2sW9/06C7lROda87a2mbubH97odD2STj6clBE34Fw7mrDoEHhZW8Eg6OhcG8oGr/dXTf93SMy8qBsjro6tBSAdrQdCI9OhnEn1DngNU/HZd23UDj9P397av30FwqJ4OxQ0DlhkDV3JyO7tfXY9qC4tgPhkUliVPhY0Jn5sRdArKPf3ppMw3qntK1QN/33zDan/eTz0U5Bb9oOhN8Ig3xiV7widK7WhonWXqaI7q6TqNsgLKs9hokC5NJ3INyzdcGxENmFrWvfondoai1f5HVSS6kAt3ydM68bfXulrq0K/5ntptQMpVCFvhwU0X8g3Jsafzkt/vLfp+dPjyWeRj+6o5OfRK4WUl+pYaF7r3Pm5ITtlW/WZgYtua2s3WriqfeCS4705WjC3WtTn9ZzezeMPX86ulWiUxLhTHi0+3rVfv+WXQmTZ9Z3S99NS8sKANn1XyGkC0+cla49o2O0MJhRiXV+p4J3pgJo2wAAniAQEtYbszr+6ZAbXZLN3WGYJbfNVq5vBZi47QT0RSAkrLc6x1vXpNOvu0HwiW3zTMXadYVATdoe6ItACJ/6oTBCdSjCMkT01v0Hp9c/EvauzFgKALBGIIT/vBkKha/4aoWts7OQHv3bt0SfJQ4A+E0ghJk3QuFahzlKZz67N6uB35wZQlp7WT+f9fstRlguAGCf207Aw34yRtwOc6RleVukELjmbKXyze/y27ozpBUA4hMIYcOyUjj9vvX48rmTvT0sShCLshxvai2sRLln4dP3WwQA3mXIKOy4Onz06HNq3wsxo9aC4OTsENLS21WJ+y22ts4BIAMVQtiwrAjO/7t0dy+q2VnWUW/LmWB297stPZy21TAOAD1TIYQLlkNG77+eCgrHXJmF9KynglvE62cBILt/ai8ARLdVFSz/PuOviswThmFwW4BOlN5epm1jCmtXAtvR7euNbR0AOEaFEA54OxSWqp4sO92qMn05eiP7PSWqgfPXmIfKPSWWHQC4zzWEENDVUHg2ABq614+zs5BOnpyN1CykABCfQAhBHekk36kAqs70Z2+bKRUCnwqTQiEA1CEQQmBrneSrHXLDR3NYBv1Swf+NEwhCIQC8TyCE4N4aPkpfIlWArwxnjbDcAJCBQAgNeHr4KEyeHBJ65jVVCwHgHQIhNGLZQRYAKemtytyZ9xEKAeB5AiE05I3ZISESQ0gB4FkCIUAyLZ5YUC0EgGe4MT1AEhGrbUeD3nQj+0jLDgA9UCEEoKqz1xUe/VsA4DuBEIAQzt6ewuELAO77p/YCAMDnc67qNw0hBQDuEQgBaJIKIQDcJxACEI7qHwC8QyAEIJxpSKhgCADPctsJAEIyJBQAnqdCCAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkJRACAAAkNT/AM3DqcC5vGpHAAAAAElFTkSuQmCC\n",
"text/plain": [
"<PIL.Image.Image image mode=RGBA size=900x600 at 0x1CE2B0F8080>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fragment_counts = {}\n",
"n_radius = 4\n",
"n_bits = 1024\n",
"AllChem.GetHashedMorganFingerprint(mol1, n_radius, nBits=n_bits, bitInfo=fragment_counts)\n",
"fragments = []\n",
"\n",
"fragments = []\n",
"for atom_pair in list(fragment_counts.values()):\n",
" atom_id = atom_pair[0][0]\n",
" radius = atom_pair[0][1]\n",
" if radius != 0:\n",
" env = Chem.FindAtomEnvironmentOfRadiusN(mol1, radius, atom_id)\n",
" amap = {}\n",
" sub_struct = Chem.PathToSubmol(mol1, env, atomMap=amap)\n",
" smi = Chem.MolToSmiles(sub_struct, rootedAtAtom=amap[atom_id], canonical=False)\n",
" sub_struct = Chem.MolFromSmiles(smi, sanitize=False)\n",
" else:\n",
" smi = mol1.GetAtomWithIdx(atom_id).GetSymbol()\n",
" sub_struct = Chem.MolFromSmiles(str(smi))\n",
" sub_struct.GetAtomWithIdx(0).SetProp('molAtomMapNumber', str( atom_id )) # mol.GetAtomWithIdx( atom_id ).GetIdx()\n",
" \n",
" fragments.append(sub_struct)\n",
"Draw.MolsToGridImage(fragments,molsPerRow=6, subImgSize=(150, 150))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```python\n",
">> list(fragment_counts.values())\n",
"\n",
"[((8, 0),),\n",
" ((4, 2),),\n",
" ((5, 0),),\n",
" ((3, 3),),\n",
" ((4, 1),),\n",
" ((1, 3),),\n",
" ((6, 1),),\n",
" ((2, 2), (10, 2)),\n",
" ((1, 0), (4, 0), (5, 3)), <------- what's going on?\n",
" ((5, 2),),\n",
" ((0, 1),),\n",
" ((4, 3),),\n",
" ((7, 0),),\n",
" ((2, 3),),\n",
" ((2, 1), (3, 1), (9, 1), (10, 1)),\n",
" ((1, 1),),\n",
" ((1, 2),),\n",
" ((5, 1),),\n",
" ((0, 0), (6, 0)),\n",
" ((4, 4),),\n",
" ((3, 2), (9, 2)),\n",
" ((2, 0), (3, 0), (9, 0), (10, 0), (6, 2)), <------- what's going on?\n",
" ((7, 1),),\n",
" ((8, 1),)]\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment