Skip to content

Instantly share code, notes, and snippets.

@sshojiro
Created August 6, 2019 07:56
Show Gist options
  • Save sshojiro/251d60898844c99bba51d85162933cc3 to your computer and use it in GitHub Desktop.
Save sshojiro/251d60898844c99bba51d85162933cc3 to your computer and use it in GitHub Desktop.
Demo of extraction of linear fragments from a molecule, inspired by a mol2vec paper(doi:10.1021/acsomega.7b02045).
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extract of Linear Fragments\n",
"\n",
"Functions defined in this notebook is:\n",
"\n",
"- bond_indices_to_nodes(mol, bond_indices)\n",
" - input: **mol** Chem.Mol\n",
" - input: **bond_indices** possible pairs of bond indices, that form linear fragments\n",
" - generate pairs of bond indices that form linear fragments\n",
"- list_end_points_indices(G)\n",
" - input: **G** networkx.Graph\n",
"- sort_atoms_in_indices(ix_start,g)\n",
" - input: **ix_start** index of the endpoint\n",
" - input: **g** networkx.Graph\n",
"- generate_linear_fragments(mol, n_len=3)\n",
" - input: **mol** Chem.Mol\n",
" - input: **n_len** number of length of the fragment\n",
" - generate all possible linear fragments in SMILES-like format with explicit single bond"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit.Chem import rdmolops\n",
"from rdkit import Chem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"from rdkit.Chem.Draw import DrawingOptions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"smi='CC(C)CC1=CC=C(C=C1)C(C)C(=O)O'\n",
"mol=Chem.MolFromSmiles(smi)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: unable to load font metrics from dir c:\\users\\shojiro_shibayama\\miniconda3\\envs\\cheminfo\\lib\\site-packages\\rdkit\\sping\\PIL\\pilfonts\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAIAAAD2HxkiAAAGqklEQVR4nO3d0XabVhCGUejq+7+yekHjqDJWiST455zZ+yrximOM9DGAEFpvt9sC5PyVXgDoToQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIusa5Hv9iPCDmfAp8SIee73R6/sq47X+xKhBAmQi5nDP6XCElY13+PCR0ZLsvf6QWgn68xaCQuy2ISQtx6a7MpWn/t+fT5lRlCl93Rdf29ubn/M8R12R1VHWV1iRDK6rI7utkOC01FSmkUoUNBauqyO6rAYfR7+b7XJPz6syDrut26vYjfJULVUVaX3VFGsg3DNkQIYSKkpE7DUIRUdbutPToUIXXdenTYKMIODycjahQhI+owDLtE6IqZcU3fYZcIGdqJG9CHvLeb31zbfIsrZoxB9n0vMHH/G5OQuh72QtdfPvYDHjILbannn4TG4KC+F/iBG5Rs/+fz7z3ybz5q/ggZ1MP5mBeq+8+3//pfjvzg7Zsv63DyCI3BzkZ56CePkM00t3s89wYlofcxzhyhMbiZ5naPpy/8/VXjjgn7eDj9cHYhCnxU4ATptBEOtMm/ZjknuNPcrDcoGeaZOqsLNhb3+Q20bXrfztnR338vtBKmnYQDueCsSZ/w7o3yWw8f4eib9vuFf+13eXIFydBrpo+xI5z74vp775R2/6q3LAs6N8KzH/sJ3uRycPq9uQK1V9mJEU7z8tSpjCnG3h2dg/aOmHg77q1MEFZ3Ej4/2Jt1o8iuicfgEo/Q6XUIR/h+aVp9X/E5U3zx3ndihM77jWJ7pDxGKedOQo/rEQJ4osPKcXaUZZnisodxiTCsw5b+ZU1Wjgg72h16hmGKCJNSW/ohemsyBpe5I6z/PNsVXOwh4pzPzBEWf0rtbukv2/yPuHJmNXOEPLfbYfE4pzR5hGWfUtkxWF+r9TB5hEvhDiuIDMPTP+ZlNPNHuNTrsP4YPG9Jdj/mZVPqMbpS3bcycY2fLhw96a7EnWP7SZcI61yjXHAM7v7oh9vAXbg47bTYHd3YBr+myMZrYo0iLKLaGCSuV4QXD8ODP6tygTYQF+hyTPjlsqdU213fn64O3/3H3vm9tP1AmLMf+G2AjD5Gjiz/7suMpy3RnNpNwsVdiT/K2ntfr2PCzQUfRTbHU3OO36K+SZ4uLzjvQzMv/vBdRtc3ws2pU2uakcipOh4TvsmtwfmsjhG+dtLvy/HMBMkRTfeXvDY1hCb78y1+ScbVocOOL1FwUNuLfi4mQvYVGUEd3vsiQggTITuKjMHN9MNQhAFzP6XOMHeHIgwo/pQqNQY7ECFjKL7leocIM8o+pYzB64kwpmyHZc26xkTIb/XHYPHFe40Ik2bdtPNHRBhWp8PiY/D7Wiqy3t4nwrw6HZY1cYGLCPlSeQx+X7bKS/unRFiCYdhZx3fW1/SRTfv/3njXu5kLEuGQfhqbz9Nyw9WaRFjI8TGln5mIsIoLxpQPfqhJhI0MvTs68QlSZ0fLcZq0G5Owiq99xXdervjI7VK5mAgLOdLJO/f/dkxYkwirOHiQ9mY82itosKPzuRlTPYkQwpwdhTARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhIkQwkQIYSKEMBFCmAghTIQQJkIIEyGEiRDCRAhhIoQwEUKYCCFMhBAmQggTIYSJEMJECGEihDARQpgIIUyEECZCCBMhhP0DDi2aYUEqz6EAAAAASUVORK5CYII=\n",
"text/plain": [
"<PIL.Image.Image image mode=RGB size=300x300 at 0x1BA6B5ABDD8>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def show_mol(m):\n",
" DrawingOptions.includeAtomNumbers=True\n",
" view= Chem.Draw.MolToImage(m)\n",
" DrawingOptions.includeAtomNumbers=False\n",
" return view\n",
"show_mol(mol)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Irrelavant method\n",
"\n",
"`rdmolops.FindAllSubgraphsOfLengthN` is not useful this time, because we want to extract linear fragment, not branched fragments."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{7: {10: {}}, 10: {7: {}, 12: {}}, 12: {10: {}, 14: {}}, 14: {12: {}}}\n",
"<dict_itemiterator object at 0x000001BA6C083C28>\n"
]
}
],
"source": [
"import networkx as nx\n",
"\n",
"\n",
"n_len=3 # 3-lengths paths\n",
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n",
"\n",
"G = nx.Graph()\n",
"lst=[]\n",
"for bond_ix in lst_paths[16]:# 1, 16\n",
" bond=mol.GetBondWithIdx(bond_ix)\n",
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n",
"G.add_edges_from(lst)\n",
"print(G.adj)\n",
"print(G.adjacency())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[11, 14]\n",
"[11, 10, 12, 14] [14, 12, 10, 11]\n"
]
}
],
"source": [
"def bond_indices_to_nodes(m, bond_indices):\n",
" G = nx.Graph()\n",
" lst=[]\n",
" for bond_ix in bond_indices:\n",
" bond= m.GetBondWithIdx(bond_ix)\n",
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n",
" G.add_edges_from(lst)\n",
" return G\n",
"def list_end_points_indices(g):\n",
" return [k for k,v in g.adjacency()if len(v)==1]\n",
"def sort_atoms_in_indices(ix_start,g):\n",
" indices_sorted = []\n",
" atom_indices = set(g.nodes)\n",
" atom_indices -= {ix_start}\n",
" next_ix = ix_start\n",
" indices_sorted += [next_ix]\n",
" while any(k in atom_indices for k in list(g.adj[next_ix].keys())):\n",
" if list(g.adj[next_ix].keys())[0] in atom_indices:\n",
" next_ix = list(g.adj[next_ix].keys())[0]\n",
" elif list(g.adj[next_ix].keys())[1] in atom_indices:\n",
" next_ix = list(g.adj[next_ix].keys())[1]\n",
" indices_sorted += [next_ix]\n",
" atom_indices -= {next_ix}\n",
" return indices_sorted\n",
"\n",
"gg = bond_indices_to_nodes(mol,list(lst_paths[-1]))\n",
"ind = list_end_points_indices(gg)\n",
"print(ind)\n",
"print(sort_atoms_in_indices(ind[0], gg),sort_atoms_in_indices(ind[1], gg))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==========\n",
"[(0, 1), (1, 3), (3, 4)]\n",
"start and end [0, 4]\n",
"function output [0, 1, 3, 4]\n",
"==========\n",
"[(1, 3), (3, 4), (4, 5)]\n",
"start and end [1, 5]\n",
"function output [1, 3, 4, 5]\n",
"==========\n",
"[(1, 3), (3, 4), (9, 4)]\n",
"start and end [1, 9]\n",
"function output [1, 3, 4, 9]\n",
"==========\n",
"[(1, 2), (1, 3), (3, 4)]\n",
"start and end [2, 4]\n",
"function output [2, 1, 3, 4]\n",
"==========\n",
"[(3, 4), (4, 5), (5, 6)]\n",
"start and end [3, 6]\n",
"function output [3, 4, 5, 6]\n",
"==========\n",
"[(3, 4), (9, 4), (8, 9)]\n",
"start and end [3, 8]\n",
"function output [3, 4, 9, 8]\n",
"==========\n",
"[(4, 5), (5, 6), (6, 7)]\n",
"start and end [4, 7]\n",
"function output [4, 5, 6, 7]\n",
"==========\n",
"[(9, 4), (8, 9), (7, 8)]\n",
"start and end [4, 7]\n",
"function output [4, 9, 8, 7]\n",
"==========\n",
"[(4, 5), (9, 4), (8, 9)]\n",
"start and end [5, 8]\n",
"function output [5, 4, 9, 8]\n",
"==========\n",
"[(5, 6), (6, 7), (7, 8)]\n",
"start and end [5, 8]\n",
"function output [5, 6, 7, 8]\n",
"==========\n",
"[(5, 6), (6, 7), (7, 10)]\n",
"start and end [5, 10]\n",
"function output [5, 6, 7, 10]\n",
"==========\n",
"[(5, 6), (4, 5), (9, 4)]\n",
"start and end [6, 9]\n",
"function output [6, 5, 4, 9]\n",
"==========\n",
"[(6, 7), (7, 8), (8, 9)]\n",
"start and end [6, 9]\n",
"function output [6, 7, 8, 9]\n",
"==========\n",
"[(6, 7), (7, 10), (10, 11)]\n",
"start and end [6, 11]\n",
"function output [6, 7, 10, 11]\n",
"==========\n",
"[(6, 7), (7, 10), (10, 12)]\n",
"start and end [6, 12]\n",
"function output [6, 7, 10, 12]\n",
"==========\n",
"[(7, 10), (10, 12), (12, 13)]\n",
"start and end [7, 13]\n",
"function output [7, 10, 12, 13]\n",
"==========\n",
"[(7, 10), (10, 12), (12, 14)]\n",
"start and end [7, 14]\n",
"function output [7, 10, 12, 14]\n",
"==========\n",
"[(7, 8), (7, 10), (10, 11)]\n",
"start and end [8, 11]\n",
"function output [8, 7, 10, 11]\n",
"==========\n",
"[(7, 8), (7, 10), (10, 12)]\n",
"start and end [8, 12]\n",
"function output [8, 7, 10, 12]\n",
"==========\n",
"[(8, 9), (7, 8), (7, 10)]\n",
"start and end [9, 10]\n",
"function output [9, 8, 7, 10]\n",
"==========\n",
"[(10, 11), (10, 12), (12, 13)]\n",
"start and end [11, 13]\n",
"function output [11, 10, 12, 13]\n",
"==========\n",
"[(10, 11), (10, 12), (12, 14)]\n",
"start and end [11, 14]\n",
"function output [11, 10, 12, 14]\n"
]
}
],
"source": [
"for p in lst_paths:\n",
" print('='*10)\n",
" lst=[]\n",
" for bond_ix in p:\n",
" bond = mol.GetBondWithIdx(bond_ix)\n",
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n",
" print(lst)\n",
" gg = bond_indices_to_nodes(mol, list(p))\n",
" ind = list_end_points_indices(gg)\n",
" print('start and end', ind)\n",
" print('function output',sort_atoms_in_indices(ind[0],gg))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def generate_linear_fragments(mol, n_len=3):\n",
" lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n",
" smiles = []\n",
" lst_indices = []\n",
" for p in lst_paths:\n",
" lst=[]\n",
" for bond_ix in p:\n",
" bond = mol.GetBondWithIdx(bond_ix)\n",
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n",
" gg = bond_indices_to_nodes(mol, list(p))\n",
" ind = list_end_points_indices(gg)\n",
" atom_indices = list(sort_atoms_in_indices(ind[0],gg))\n",
" lst_indices += [atom_indices]\n",
" smi=''\n",
" for ix in range(n_len):\n",
" smi += mol.GetAtomWithIdx(atom_indices[ix]).GetSymbol()\n",
" bond = mol.GetBondBetweenAtoms(atom_indices[ix],atom_indices[ix+1]) \n",
" if bond.GetBondType() is Chem.BondType.SINGLE:\n",
" smi += '-'# explicitly shown, compared to aromatic bond.\n",
" elif bond.GetBondType() is Chem.BondType.DOUBLE:\n",
" smi += '='\n",
" elif bond.GetBondType() is Chem.BondType.TRIPLE:\n",
" smi += '#'\n",
" elif bond.GetBondType() is Chem.BondType.QUADRUPLE:\n",
" smi += '$'\n",
" else:\n",
" smi += mol.GetAtomWithIdx(atom_indices[ix+1]).GetSymbol()\n",
" smiles += [smi]\n",
" return smiles, lst_indices"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0, 1, 3, 4, 5, 6],\n",
" [0, 1, 3, 4, 9, 8],\n",
" [1, 3, 4, 5, 6, 7],\n",
" [1, 3, 4, 9, 8, 7],\n",
" [2, 1, 3, 4, 5, 6],\n",
" [2, 1, 3, 4, 9, 8],\n",
" [3, 4, 5, 6, 7, 8],\n",
" [3, 4, 5, 6, 7, 10],\n",
" [3, 4, 9, 8, 7, 6],\n",
" [3, 4, 9, 8, 7, 10],\n",
" [4, 5, 6, 7, 8, 9],\n",
" [4, 5, 6, 7, 10, 11],\n",
" [4, 5, 6, 7, 10, 12],\n",
" [4, 9, 8, 7, 6, 5],\n",
" [4, 9, 8, 7, 10, 11],\n",
" [4, 9, 8, 7, 10, 12],\n",
" [5, 4, 9, 8, 7, 6],\n",
" [5, 4, 9, 8, 7, 10],\n",
" [5, 6, 7, 10, 12, 13],\n",
" [5, 6, 7, 10, 12, 14],\n",
" [6, 5, 4, 9, 8, 7],\n",
" [7, 6, 5, 4, 9, 8],\n",
" [8, 7, 6, 5, 4, 9],\n",
" [9, 4, 5, 6, 7, 10],\n",
" [9, 8, 7, 10, 12, 13],\n",
" [9, 8, 7, 10, 12, 14]]\n"
]
}
],
"source": [
"from pprint import pprint\n",
"for l in range(1, 6):\n",
" smis, atom_ixs = generate_linear_fragments(mol, l)\n",
"else:\n",
" pprint(atom_ixs)\n",
"# print('length items:', l,len(smis))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['C-C-C-CCC', 'C-C-C-CCC', 'C-C-CCCC', 'C-C-CCCC', 'C-C-C-CCC']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smis, atom_ixs = generate_linear_fragments(mol, l)\n",
"smis[:5]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==========\n",
"SINGLE\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"AROMATIC\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"CCC=O\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"SINGLE\n",
"CCCO\n",
"==========\n",
"AROMATIC\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"SINGLE\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"AROMATIC\n",
"AROMATIC\n",
"SINGLE\n",
"CCCC\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"CCC=O\n",
"==========\n",
"SINGLE\n",
"SINGLE\n",
"SINGLE\n",
"CCCO\n"
]
}
],
"source": [
"n_len=3 # 3-lengths paths\n",
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,useBonds=True,useHs=True)\n",
"for p in lst_paths:\n",
" print('='*10)\n",
" lst=[]\n",
" for bond_ix in p:\n",
" bond = mol.GetBondWithIdx(bond_ix)\n",
" lst+=[(bond.GetBeginAtomIdx(),bond.GetEndAtomIdx())]\n",
" gg = bond_indices_to_nodes(mol, list(p))\n",
" ind = list_end_points_indices(gg)\n",
" atom_indices = list(sort_atoms_in_indices(ind[0],gg))\n",
" \n",
" smi=''\n",
" for ix in range(n_len):\n",
" smi += mol.GetAtomWithIdx(atom_indices[ix]).GetSymbol()\n",
" bond = mol.GetBondBetweenAtoms(atom_indices[ix],atom_indices[ix+1]) \n",
" if bond.GetBondType() is Chem.BondType.DOUBLE:\n",
" smi += '='\n",
" elif bond.GetBondType() is Chem.BondType.TRIPLE:\n",
" smi += '#'\n",
" else:\n",
" print(bond.GetBondType())\n",
" else:\n",
" smi += mol.GetAtomWithIdx(atom_indices[ix+1]).GetSymbol()\n",
" print(smi)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Proof that GetShortestPath is not the best in this case\n",
"\n",
"The code below using `FindAllPathsOfLengthN` is also not the best, because atom indices in `tup_atom_inter_ix` are randomly aligned."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 5\n",
"0 1 3 4 5\n",
"0 1 3 4 5\n",
"0 9\n",
"0 1 3 4 9\n",
"0 1 3 4 9\n",
"1 6\n",
"1 3 4 5 6\n",
"1 3 4 5 6\n",
"1 8\n",
"1 9 3 4 8\n",
"1 3 4 9 8\n",
"2 5\n",
"2 1 3 4 5\n",
"2 1 3 4 5\n",
"2 9\n",
"2 1 3 4 9\n",
"2 1 3 4 9\n",
"3 7\n",
"3 4 5 6 7\n",
"3 4 5 6 7\n",
"3 7\n",
"3 8 9 4 7\n",
"3 4 5 6 7\n",
"4 8\n",
"4 5 6 7 8\n",
"4 9 8\n",
"4 10\n",
"4 5 6 7 10\n",
"4 5 6 7 10\n",
"4 6\n",
"4 8 9 7 6\n",
"4 5 6\n",
"4 10\n",
"4 8 9 7 10\n",
"4 5 6 7 10\n",
"5 7\n",
"5 8 9 4 7\n",
"5 6 7\n",
"5 9\n",
"5 8 6 7 9\n",
"5 4 9\n",
"5 11\n",
"5 10 6 7 11\n",
"5 6 7 10 11\n",
"5 12\n",
"5 10 6 7 12\n",
"5 6 7 10 12\n",
"6 8\n",
"6 9 4 5 8\n",
"6 7 8\n",
"6 13\n",
"6 10 12 7 13\n",
"6 7 10 12 13\n",
"6 14\n",
"6 10 12 7 14\n",
"6 7 10 12 14\n",
"7 9\n",
"7 4 5 6 9\n",
"7 8 9\n",
"8 13\n",
"8 10 12 7 13\n",
"8 7 10 12 13\n",
"8 14\n",
"8 10 12 7 14\n",
"8 7 10 12 14\n",
"9 11\n",
"9 8 10 7 11\n",
"9 8 7 10 11\n",
"9 12\n",
"9 8 10 7 12\n",
"9 8 7 10 12\n"
]
}
],
"source": [
"n_len=4\n",
"lst_paths = rdmolops.FindAllPathsOfLengthN(mol,n_len,\n",
" useBonds=True,\n",
" useHs=True)\n",
"for pathset in lst_paths:\n",
" tup_atom_inter_ix = {mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx()\n",
" for bond_ix in pathset[1:-1]} | {\n",
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()\n",
" for bond_ix in pathset[1:-1]}\n",
" bond_ix = pathset[0]\n",
" atom_ix_start={mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx(),\n",
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()}-tup_atom_inter_ix\n",
"\n",
" bond_ix = pathset[-1]\n",
" atom_ix_end={mol.GetBondWithIdx(bond_ix).GetBeginAtomIdx(),\n",
" mol.GetBondWithIdx(bond_ix).GetEndAtomIdx()}-tup_atom_inter_ix\n",
" print(list(atom_ix_start)[0],list(atom_ix_end)[0])\n",
" print(list(atom_ix_start)[0],*list(tup_atom_inter_ix),list(atom_ix_end)[0])\n",
" print(*rdmolops.GetShortestPath(mol,\n",
" list(atom_ix_start)[0],list(atom_ix_end)[0]))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment