Last active
October 28, 2019 13:08
-
-
Save goraj/410cd48c0f72cf7c1ab815576a8b008d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Oct 25 23:16:39 2019 | |
rdkit.Chem.rdFingerprintGenerator.GetRDKitFPGenerator([(int)minPath=1[, (int)maxPath=7[, (bool)useHs=True[, (bool)branchedPaths=True[, (bool)useBondOrder=True[, (bool)countSimulation=False[, (AtomPairsParameters)countBounds=None[, (int)fpSize=2048[, (int)numBitsPerFeature=2[, (AtomPairsParameters)atomInvariantsGenerator=None]]]]]]]]]]) → FingerprintGenerator64 : | |
Get an RDKit fingerprint generator | |
ARGUMENTS: | |
minPath: the minimum path length (in bonds) to be included | |
maxPath: the maximum path length (in bonds) to be included | |
useHs: toggles inclusion of Hs in paths (if the molecule has explicit Hs) | |
branchedPaths: toggles generation of branched subgraphs, not just linear paths | |
useBondOrder: toggles inclusion of bond orders in the path hashes | |
useCountSimulation: if set, use count simulation while generating the fingerprint | |
countBounds: boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot | |
fpSize: size of the generated fingerprint, does not affect the sparse versions | |
numBitsPerFeature: the number of bits set per path/subgraph found | |
atomInvariantsGenerator: atom invariants to be used during fingerprint generation | |
RETURNS: FingerprintGenerator | |
@author: goraj | |
""" | |
from scipy.sparse import dok_matrix | |
from rdkit import Chem | |
from rdkit.Chem.rdFingerprintGenerator import GetRDKitFPGenerator | |
molecules = [ | |
Chem.MolFromSmiles('C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl') | |
for _ in range(100) | |
] | |
def rdk(molecules): | |
rdk_gen = GetRDKitFPGenerator() | |
for j, molecule in enumerate(molecules): | |
M = None | |
fp = rdk_gen.GetSparseCountFingerprint(molecule) | |
if M is None: | |
M = dok_matrix((fp.GetLength(), len(molecules))) | |
for (k, v) in fp.GetNonzeroElements().items(): | |
M[k, j] = v | |
return M | |
M = rdk(molecules) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment