Skip to content

Instantly share code, notes, and snippets.

@marta-sd
Created January 17, 2018 18:04
Show Gist options
  • Save marta-sd/ecf88ee96c41022f27d9f5458a142811 to your computer and use it in GitHub Desktop.
Save marta-sd/ecf88ee96c41022f27d9f5458a142811 to your computer and use it in GitHub Desktop.
Merge multiple molecules in DeCAF
from rdkit.Chem import MolFromSmiles
from decaf.toolkits.rd import phar_from_mol
from decaf.utils import similarity, combine_pharmacophores, draw, filter_nodes
from scipy.cluster.hierarchy import average as avg_clustering
from scipy.spatial.distance import squareform
smiles = [
'c1cc(cc(c1)N)c2cc(cc(c2O)c3[nH]c4ccc(cc4n3)C(=N)N)Cl',
'c1cc(cc(c1)N(=O)=O)c2cc(cc(c2O)c3cc4cc(ccc4[nH]3)C(=N)N)C(CC(=O)O)C(=O)O',
'c1cc(c(cc1CNC(=O)N)c2cc(cc(c2O)c3[nH]c4ccc(cc4n3)C(=N)N)C(CC(=O)O)C(=O)O)O',
'c1ccc(cc1)C[C@@H](C(=O)NCc2cccc(c2)c3cc(cc(c3O)c4cc-5c([nH]ccc5n4)N)C(=O)O)O',
'CC[C@@H](C)NC(=O)c1cc(cc(c1)N)c2cnc(c(=O)n2CC(=O)NCc3ccc(cc3)C(=N)N)NC(C)C',
]
phars = [phar_from_mol(MolFromSmiles(smi)) for smi in smiles]
# vector-form distance matrix
distance = [1 - similarity(p1, p2)[0] for i, p1 in enumerate(phars) for p2 in phars[i+1:]]
print('Distance matrix:')
print(squareform(distance))
print()
clustering = avg_clustering(distance)
print('Clustering linkage matrix:')
print(clustering)
print()
# merge models based on clustering
clusters = phars[:]
for i, j, dist, mols_in_cluster in clustering:
clusters.append(combine_pharmacophores(clusters[int(i)], clusters[int(j)]))
# filter the least frequent nodes
merged_molecules = filter_nodes(clusters[-1], freq_range=(0.3, 1))
# draw and inspect the model
draw(merged_molecules)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment