Created
August 6, 2019 07:56
-
-
Save sshojiro/251d60898844c99bba51d85162933cc3 to your computer and use it in GitHub Desktop.
Demo of extraction of linear fragments from a molecule, inspired by a mol2vec paper(doi:10.1021/acsomega.7b02045).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Extract of Linear Fragments\n", | |
"\n", | |
"Functions defined in this notebook is:\n", | |
"\n", | |
"- bond_indices_to_nodes(mol, bond_indices)\n", | |
" - input: **mol** Chem.Mol\n", | |
" - input: **bond_indices** possible pairs of bond indices, that form linear fragments\n", | |
" - generate pairs of bond indices that form linear fragments\n", | |
"- list_end_points_indices(G)\n", | |
" - input: **G** networkx.Graph\n", | |
"- sort_atoms_in_indices(ix_start,g)\n", | |
" - input: **ix_start** index of the endpoint\n", | |
" - input: **g** networkx.Graph\n", | |
"- generate_linear_fragments(mol, n_len=3)\n", | |
" - input: **mol** Chem.Mol\n", | |
" - input: **n_len** number of length of the fragment\n", | |
" - generate all possible linear fragments in SMILES-like format with explicit single bond" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit.Chem import rdmolops\n", | |
"from rdkit import Chem\n", | |
"from rdkit.Chem.Draw import IPythonConsole\n", | |
"from rdkit.Chem.Draw import DrawingOptions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"smi='CC(C)CC1=CC=C(C=C1)C(C)C(=O)O'\n", | |
"mol=Chem.MolFromSmiles(smi)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Warning: unable to load font metrics from dir c:\\users\\shojiro_shibayama\\miniconda3\\envs\\cheminfo\\lib\\site-packages\\rdkit\\sping\\PIL\\pilfonts\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAIAAAD2HxkiAAAGqklEQVR4nO3d0XabVhCGUejq+7+yekHjqDJWiST455zZ+yrximOM9DGAEFpvt9sC5PyVXgDoToQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIusa5Hv9iPCDmfAp8SIee73R6/sq47X+xKhBAmQi5nDP6XCElY13+PCR0ZLsvf6QWgn68xaCQuy2ISQtx6a7MpWn/t+fT5lRlCl93Rdf29ubn/M8R12R1VHWV1iRDK6rI7utkOC01FSmkUoUNBauqyO6rAYfR7+b7XJPz6syDrut26vYjfJULVUVaX3VFGsg3DNkQIYSKkpE7DUIRUdbutPToUIXXdenTYKMIODycjahQhI+owDLtE6IqZcU3fYZcIGdqJG9CHvLeb31zbfIsrZoxB9n0vMHH/G5OQuh72QtdfPvYDHjILbannn4TG4KC+F/iBG5Rs/+fz7z3ybz5q/ggZ1MP5mBeq+8+3//pfjvzg7Zsv63DyCI3BzkZ56CePkM00t3s89wYlofcxzhyhMbiZ5naPpy/8/VXjjgn7eDj9cHYhCnxU4ATptBEOtMm/ZjknuNPcrDcoGeaZOqsLNhb3+Q20bXrfztnR338vtBKmnYQDueCsSZ/w7o3yWw8f4eib9vuFf+13eXIFydBrpo+xI5z74vp775R2/6q3LAs6N8KzH/sJ3uRycPq9uQK1V9mJEU7z8tSpjCnG3h2dg/aOmHg77q1MEFZ3Ej4/2Jt1o8iuicfgEo/Q6XUIR/h+aVp9X/E5U3zx3ndihM77jWJ7pDxGKedOQo/rEQJ4osPKcXaUZZnisodxiTCsw5b+ZU1Wjgg72h16hmGKCJNSW/ohemsyBpe5I6z/PNsVXOwh4pzPzBEWf0rtbukv2/yPuHJmNXOEPLfbYfE4pzR5hGWfUtkxWF+r9TB5hEvhDiuIDMPTP+ZlNPNHuNTrsP4YPG9Jdj/mZVPqMbpS3bcycY2fLhw96a7EnWP7SZcI61yjXHAM7v7oh9vAXbg47bTYHd3YBr+myMZrYo0iLKLaGCSuV4QXD8ODP6tygTYQF+hyTPjlsqdU213fn64O3/3H3vm9tP1AmLMf+G2AjD5Gjiz/7suMpy3RnNpNwsVdiT/K2ntfr2PCzQUfRTbHU3OO36K+SZ4uLzjvQzMv/vBdRtc3ws2pU2uakcipOh4TvsmtwfmsjhG+dtLvy/HMBMkRTfeXvDY1hCb78y1+ScbVocOOL1FwUNuLfi4mQvYVGUEd3vsiQggTITuKjMHN9MNQhAFzP6XOMHeHIgwo/pQqNQY7ECFjKL7leocIM8o+pYzB64kwpmyHZc26xkTIb/XHYPHFe40Ik2bdtPNHRBhWp8PiY/D7Wiqy3t4nwrw6HZY1cYGLCPlSeQx+X7bKS/unRFiCYdhZx3fW1/SRTfv/3njXu5kLEuGQfhqbz9Nyw9WaRFjI8TGln5mIsIoLxpQPfqhJhI0MvTs68QlSZ0fLcZq0G5Owiq99xXdervjI7VK5mAgLOdLJO/f/dkxYkwirOHiQ9mY82itosKPzuRlTPYkQwpwdhTARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhP0DDi2aYUEqz6EAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGB size=300x300 at 0x1BA6B5ABDD8>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def show_mol(m):\n", | |
" DrawingOptions.includeAtomNumbers=True\n", | |
" view= Chem.Draw.MolToImage(m)\n", | |
" DrawingOptions.includeAtomNumbers=False\n", | |
" return view\n", | |
"show_mol(mol)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Irrelavant method\n", | |
"\n", | |
"`rdmolops.FindAllSubgraphsOfLengthN` is not useful this time, because we want to extract linear fragment, not branched fragments." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{7: {10: {}}, 10: {7: {}, 12: {}}, 12: {10: {}, 14: {}}, 14: {12: {}}}\n", | |
"<dict_itemiterator object at 0x000001BA6C083C28>\n" | |
] | |
} | |
], | |
"source": [ | |
"import networkx as nx\n", | |
"\n", | |
"\n", | |
"n_len=3 # 3-lengths paths\n", | |
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n", | |
"\n", | |
"G = nx.Graph()\n", | |
"lst=[]\n", | |
"for bond_ix in lst_paths[16]:# 1, 16\n", | |
" bond=mol.GetBondWithIdx(bond_ix)\n", | |
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n", | |
"G.add_edges_from(lst)\n", | |
"print(G.adj)\n", | |
"print(G.adjacency())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[11, 14]\n", | |
"[11, 10, 12, 14] [14, 12, 10, 11]\n" | |
] | |
} | |
], | |
"source": [ | |
"def bond_indices_to_nodes(m, bond_indices):\n", | |
" G = nx.Graph()\n", | |
" lst=[]\n", | |
" for bond_ix in bond_indices:\n", | |
" bond= m.GetBondWithIdx(bond_ix)\n", | |
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n", | |
" G.add_edges_from(lst)\n", | |
" return G\n", | |
"def list_end_points_indices(g):\n", | |
" return [k for k,v in g.adjacency()if len(v)==1]\n", | |
"def sort_atoms_in_indices(ix_start,g):\n", | |
" indices_sorted = []\n", | |
" atom_indices = set(g.nodes)\n", | |
" atom_indices -= {ix_start}\n", | |
" next_ix = ix_start\n", | |
" indices_sorted += [next_ix]\n", | |
" while any(k in atom_indices for k in list(g.adj[next_ix].keys())):\n", | |
" if list(g.adj[next_ix].keys())[0] in atom_indices:\n", | |
" next_ix = list(g.adj[next_ix].keys())[0]\n", | |
" elif list(g.adj[next_ix].keys())[1] in atom_indices:\n", | |
" next_ix = list(g.adj[next_ix].keys())[1]\n", | |
" indices_sorted += [next_ix]\n", | |
" atom_indices -= {next_ix}\n", | |
" return indices_sorted\n", | |
"\n", | |
"gg = bond_indices_to_nodes(mol,list(lst_paths[-1]))\n", | |
"ind = list_end_points_indices(gg)\n", | |
"print(ind)\n", | |
"print(sort_atoms_in_indices(ind[0], gg),sort_atoms_in_indices(ind[1], gg))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"==========\n", | |
"[(0, 1), (1, 3), (3, 4)]\n", | |
"start and end [0, 4]\n", | |
"function output [0, 1, 3, 4]\n", | |
"==========\n", | |
"[(1, 3), (3, 4), (4, 5)]\n", | |
"start and end [1, 5]\n", | |
"function output [1, 3, 4, 5]\n", | |
"==========\n", | |
"[(1, 3), (3, 4), (9, 4)]\n", | |
"start and end [1, 9]\n", | |
"function output [1, 3, 4, 9]\n", | |
"==========\n", | |
"[(1, 2), (1, 3), (3, 4)]\n", | |
"start and end [2, 4]\n", | |
"function output [2, 1, 3, 4]\n", | |
"==========\n", | |
"[(3, 4), (4, 5), (5, 6)]\n", | |
"start and end [3, 6]\n", | |
"function output [3, 4, 5, 6]\n", | |
"==========\n", | |
"[(3, 4), (9, 4), (8, 9)]\n", | |
"start and end [3, 8]\n", | |
"function output [3, 4, 9, 8]\n", | |
"==========\n", | |
"[(4, 5), (5, 6), (6, 7)]\n", | |
"start and end [4, 7]\n", | |
"function output [4, 5, 6, 7]\n", | |
"==========\n", | |
"[(9, 4), (8, 9), (7, 8)]\n", | |
"start and end [4, 7]\n", | |
"function output [4, 9, 8, 7]\n", | |
"==========\n", | |
"[(4, 5), (9, 4), (8, 9)]\n", | |
"start and end [5, 8]\n", | |
"function output [5, 4, 9, 8]\n", | |
"==========\n", | |
"[(5, 6), (6, 7), (7, 8)]\n", | |
"start and end [5, 8]\n", | |
"function output [5, 6, 7, 8]\n", | |
"==========\n", | |
"[(5, 6), (6, 7), (7, 10)]\n", | |
"start and end [5, 10]\n", | |
"function output [5, 6, 7, 10]\n", | |
"==========\n", | |
"[(5, 6), (4, 5), (9, 4)]\n", | |
"start and end [6, 9]\n", | |
"function output [6, 5, 4, 9]\n", | |
"==========\n", | |
"[(6, 7), (7, 8), (8, 9)]\n", | |
"start and end [6, 9]\n", | |
"function output [6, 7, 8, 9]\n", | |
"==========\n", | |
"[(6, 7), (7, 10), (10, 11)]\n", | |
"start and end [6, 11]\n", | |
"function output [6, 7, 10, 11]\n", | |
"==========\n", | |
"[(6, 7), (7, 10), (10, 12)]\n", | |
"start and end [6, 12]\n", | |
"function output [6, 7, 10, 12]\n", | |
"==========\n", | |
"[(7, 10), (10, 12), (12, 13)]\n", | |
"start and end [7, 13]\n", | |
"function output [7, 10, 12, 13]\n", | |
"==========\n", | |
"[(7, 10), (10, 12), (12, 14)]\n", | |
"start and end [7, 14]\n", | |
"function output [7, 10, 12, 14]\n", | |
"==========\n", | |
"[(7, 8), (7, 10), (10, 11)]\n", | |
"start and end [8, 11]\n", | |
"function output [8, 7, 10, 11]\n", | |
"==========\n", | |
"[(7, 8), (7, 10), (10, 12)]\n", | |
"start and end [8, 12]\n", | |
"function output [8, 7, 10, 12]\n", | |
"==========\n", | |
"[(8, 9), (7, 8), (7, 10)]\n", | |
"start and end [9, 10]\n", | |
"function output [9, 8, 7, 10]\n", | |
"==========\n", | |
"[(10, 11), (10, 12), (12, 13)]\n", | |
"start and end [11, 13]\n", | |
"function output [11, 10, 12, 13]\n", | |
"==========\n", | |
"[(10, 11), (10, 12), (12, 14)]\n", | |
"start and end [11, 14]\n", | |
"function output [11, 10, 12, 14]\n" | |
] | |
} | |
], | |
"source": [ | |
"for p in lst_paths:\n", | |
" print('='*10)\n", | |
" lst=[]\n", | |
" for bond_ix in p:\n", | |
" bond = mol.GetBondWithIdx(bond_ix)\n", | |
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n", | |
" print(lst)\n", | |
" gg = bond_indices_to_nodes(mol, list(p))\n", | |
" ind = list_end_points_indices(gg)\n", | |
" print('start and end', ind)\n", | |
" print('function output',sort_atoms_in_indices(ind[0],gg))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def generate_linear_fragments(mol, n_len=3):\n", | |
" lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n", | |
" smiles = []\n", | |
" lst_indices = []\n", | |
" for p in lst_paths:\n", | |
" lst=[]\n", | |
" for bond_ix in p:\n", | |
" bond = mol.GetBondWithIdx(bond_ix)\n", | |
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n", | |
" gg = bond_indices_to_nodes(mol, list(p))\n", | |
" ind = list_end_points_indices(gg)\n", | |
" atom_indices = list(sort_atoms_in_indices(ind[0],gg))\n", | |
" lst_indices += [atom_indices]\n", | |
" smi=''\n", | |
" for ix in range(n_len):\n", | |
" smi += mol.GetAtomWithIdx(atom_indices[ix]).GetSymbol()\n", | |
" bond = mol.GetBondBetweenAtoms(atom_indices[ix],atom_indices[ix+1]) \n", | |
" if bond.GetBondType() is Chem.BondType.SINGLE:\n", | |
" smi += '-'# explicitly shown, compared to aromatic bond.\n", | |
" elif bond.GetBondType() is Chem.BondType.DOUBLE:\n", | |
" smi += '='\n", | |
" elif bond.GetBondType() is Chem.BondType.TRIPLE:\n", | |
" smi += '#'\n", | |
" elif bond.GetBondType() is Chem.BondType.QUADRUPLE:\n", | |
" smi += '$'\n", | |
" else:\n", | |
" smi += mol.GetAtomWithIdx(atom_indices[ix+1]).GetSymbol()\n", | |
" smiles += [smi]\n", | |
" return smiles, lst_indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[0, 1, 3, 4, 5, 6],\n", | |
" [0, 1, 3, 4, 9, 8],\n", | |
" [1, 3, 4, 5, 6, 7],\n", | |
" [1, 3, 4, 9, 8, 7],\n", | |
" [2, 1, 3, 4, 5, 6],\n", | |
" [2, 1, 3, 4, 9, 8],\n", | |
" [3, 4, 5, 6, 7, 8],\n", | |
" [3, 4, 5, 6, 7, 10],\n", | |
" [3, 4, 9, 8, 7, 6],\n", | |
" [3, 4, 9, 8, 7, 10],\n", | |
" [4, 5, 6, 7, 8, 9],\n", | |
" [4, 5, 6, 7, 10, 11],\n", | |
" [4, 5, 6, 7, 10, 12],\n", | |
" [4, 9, 8, 7, 6, 5],\n", | |
" [4, 9, 8, 7, 10, 11],\n", | |
" [4, 9, 8, 7, 10, 12],\n", | |
" [5, 4, 9, 8, 7, 6],\n", | |
" [5, 4, 9, 8, 7, 10],\n", | |
" [5, 6, 7, 10, 12, 13],\n", | |
" [5, 6, 7, 10, 12, 14],\n", | |
" [6, 5, 4, 9, 8, 7],\n", | |
" [7, 6, 5, 4, 9, 8],\n", | |
" [8, 7, 6, 5, 4, 9],\n", | |
" [9, 4, 5, 6, 7, 10],\n", | |
" [9, 8, 7, 10, 12, 13],\n", | |
" [9, 8, 7, 10, 12, 14]]\n" | |
] | |
} | |
], | |
"source": [ | |
"from pprint import pprint\n", | |
"for l in range(1, 6):\n", | |
" smis, atom_ixs = generate_linear_fragments(mol, l)\n", | |
"else:\n", | |
" pprint(atom_ixs)\n", | |
"# print('length items:', l,len(smis))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['C-C-C-CCC', 'C-C-C-CCC', 'C-C-CCCC', 'C-C-CCCC', 'C-C-C-CCC']" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"smis, atom_ixs = generate_linear_fragments(mol, l)\n", | |
"smis[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCC=O\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCO\n", | |
"==========\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"AROMATIC\n", | |
"AROMATIC\n", | |
"SINGLE\n", | |
"CCCC\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCC=O\n", | |
"==========\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"SINGLE\n", | |
"CCCO\n" | |
] | |
} | |
], | |
"source": [ | |
"n_len=3 # 3-lengths paths\n", | |
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n", | |
"for p in lst_paths:\n", | |
" print('='*10)\n", | |
" lst=[]\n", | |
" for bond_ix in p:\n", | |
" bond = mol.GetBondWithIdx(bond_ix)\n", | |
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n", | |
" gg = bond_indices_to_nodes(mol, list(p))\n", | |
" ind = list_end_points_indices(gg)\n", | |
" atom_indices = list(sort_atoms_in_indices(ind[0],gg))\n", | |
" \n", | |
" smi=''\n", | |
" for ix in range(n_len):\n", | |
" smi += mol.GetAtomWithIdx(atom_indices[ix]).GetSymbol()\n", | |
" bond = mol.GetBondBetweenAtoms(atom_indices[ix],atom_indices[ix+1]) \n", | |
" if bond.GetBondType() is Chem.BondType.DOUBLE:\n", | |
" smi += '='\n", | |
" elif bond.GetBondType() is Chem.BondType.TRIPLE:\n", | |
" smi += '#'\n", | |
" else:\n", | |
" print(bond.GetBondType())\n", | |
" else:\n", | |
" smi += mol.GetAtomWithIdx(atom_indices[ix+1]).GetSymbol()\n", | |
" print(smi)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Proof that GetShortestPath is not the best in this case\n", | |
"\n", | |
"The code below using `FindAllPathsOfLengthN` is also not the best, because atom indices in `tup_atom_inter_ix` are randomly aligned." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 5\n", | |
"0 1 3 4 5\n", | |
"0 1 3 4 5\n", | |
"0 9\n", | |
"0 1 3 4 9\n", | |
"0 1 3 4 9\n", | |
"1 6\n", | |
"1 3 4 5 6\n", | |
"1 3 4 5 6\n", | |
"1 8\n", | |
"1 9 3 4 8\n", | |
"1 3 4 9 8\n", | |
"2 5\n", | |
"2 1 3 4 5\n", | |
"2 1 3 4 5\n", | |
"2 9\n", | |
"2 1 3 4 9\n", | |
"2 1 3 4 9\n", | |
"3 7\n", | |
"3 4 5 6 7\n", | |
"3 4 5 6 7\n", | |
"3 7\n", | |
"3 8 9 4 7\n", | |
"3 4 5 6 7\n", | |
"4 8\n", | |
"4 5 6 7 8\n", | |
"4 9 8\n", | |
"4 10\n", | |
"4 5 6 7 10\n", | |
"4 5 6 7 10\n", | |
"4 6\n", | |
"4 8 9 7 6\n", | |
"4 5 6\n", | |
"4 10\n", | |
"4 8 9 7 10\n", | |
"4 5 6 7 10\n", | |
"5 7\n", | |
"5 8 9 4 7\n", | |
"5 6 7\n", | |
"5 9\n", | |
"5 8 6 7 9\n", | |
"5 4 9\n", | |
"5 11\n", | |
"5 10 6 7 11\n", | |
"5 6 7 10 11\n", | |
"5 12\n", | |
"5 10 6 7 12\n", | |
"5 6 7 10 12\n", | |
"6 8\n", | |
"6 9 4 5 8\n", | |
"6 7 8\n", | |
"6 13\n", | |
"6 10 12 7 13\n", | |
"6 7 10 12 13\n", | |
"6 14\n", | |
"6 10 12 7 14\n", | |
"6 7 10 12 14\n", | |
"7 9\n", | |
"7 4 5 6 9\n", | |
"7 8 9\n", | |
"8 13\n", | |
"8 10 12 7 13\n", | |
"8 7 10 12 13\n", | |
"8 14\n", | |
"8 10 12 7 14\n", | |
"8 7 10 12 14\n", | |
"9 11\n", | |
"9 8 10 7 11\n", | |
"9 8 7 10 11\n", | |
"9 12\n", | |
"9 8 10 7 12\n", | |
"9 8 7 10 12\n" | |
] | |
} | |
], | |
"source": [ | |
"n_len=4\n", | |
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,\n", | |
" useBonds=True,\n", | |
" useHs=True)\n", | |
"for pathset in lst_paths:\n", | |
" tup_atom_inter_ix = {mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx()\n", | |
" for bond_ix in pathset[1:-1]} | {\n", | |
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()\n", | |
" for bond_ix in pathset[1:-1]}\n", | |
" bond_ix = pathset[0]\n", | |
" atom_ix_start={mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx(),\n", | |
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()}-tup_atom_inter_ix\n", | |
"\n", | |
" bond_ix = pathset[-1]\n", | |
" atom_ix_end={mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx(),\n", | |
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()}-tup_atom_inter_ix\n", | |
" print(list(atom_ix_start)[0],list(atom_ix_end)[0])\n", | |
" print(list(atom_ix_start)[0],*list(tup_atom_inter_ix),list(atom_ix_end)[0])\n", | |
" print(*rdmolops.GetShortestPath(mol,\n", | |
" list(atom_ix_start)[0],list(atom_ix_end)[0]))\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment