Created
March 8, 2019 04:05
-
-
Save ravila4/a18bb9d0e3f54de3c9fb8f75908f992f to your computer and use it in GitHub Desktop.
Utility script for generating TPATF and TPAPF fingerprints from SMILES strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Utility script for feature generation | |
# Md Mahmudulla Hassan | |
# The University of Texas at El Paso | |
# Last Modified: 12/19/2018 | |
import os | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
import tempfile | |
import shutil | |
MAYACHEMTOOLS_DIR = "mayachemtools" | |
class FeatureGenerator: | |
def __init__(self, smiles): | |
self.smiles = smiles | |
self.temp_dir = tempfile.mkdtemp() | |
def toString(self): | |
return self.smiles | |
def toSDF(self): | |
# Try to get the rdkit mol | |
mol = Chem.MolFromSmiles(self.smiles) | |
#if mol == None: raise("Error in mol object") | |
# Compute 2D coordinates | |
AllChem.Compute2DCoords(mol) | |
mol.SetProp("smiles", self.smiles) | |
#self.sdf_filepath = os.path.join(self.temp_dir, "temp.sdf") | |
w = Chem.SDWriter(os.path.join(self.temp_dir, "temp.sdf")) | |
w.write(mol) | |
w.flush() | |
def toTPATF(self): | |
features = [] | |
script_path = os.path.join(MAYACHEMTOOLS_DIR, | |
"bin/TopologicalPharmacophoreAtomTripletsFingerprints.pl") | |
self.toSDF() | |
# Now generate the TPATF features | |
# Check if the sdf file exists | |
if not os.path.isfile(os.path.join(self.temp_dir, "temp.sdf")): | |
print("Error: sdf file could not be created.") | |
return None | |
command = ("perl " + script_path + " -r " + | |
os.path.join(self.temp_dir, "temp") + | |
" --AtomTripletsSetSizeToUse FixedSize -v ValuesString -o " | |
+ os.path.join(self.temp_dir, "temp.sdf")) | |
os.system(command) | |
with open(os.path.join(self.temp_dir, "temp.csv"), 'r') as f: | |
for line in f.readlines(): | |
if "Cmpd" in line: | |
line = line.split(';')[5].replace('"','') | |
features = [int(i) for i in line.split(" ")] | |
# Clean up the temporary files | |
self._cleanup() | |
return features | |
def toTPAPF(self): | |
features = [] | |
script_path = os.path.join(MAYACHEMTOOLS_DIR, | |
"bin/TopologicalPharmacophoreAtomPairsFingerprints.pl") | |
# Generate the sdf file | |
self.toSDF() | |
# Now generate the TPATF features | |
# Check if the sdf file exists | |
if not os.path.isfile(os.path.join(self.temp_dir, "temp.sdf")): | |
print("Error: sdf file could not be created.") | |
return None | |
command = ("perl " + script_path + " -r " + | |
os.path.join(self.temp_dir, "temp") + | |
" --AtomPairsSetSizeToUse FixedSize -v ValuesString -o " + | |
os.path.join(self.temp_dir, "temp.sdf")) | |
os.system(command) | |
with open(os.path.join(self.temp_dir, "temp.csv"), 'r') as f: | |
for line in f.readlines(): | |
if "Cmpd" in line: | |
line = line.split(';')[5].replace('"','') | |
features = [int(i) for i in line.split(" ")] | |
# Clean up the temporary files | |
self._cleanup() | |
return features | |
def _cleanup(self): | |
shutil.rmtree(self.temp_dir) | |
# Example: Extracting TPATF features | |
# from utility import FeatureGenerator | |
# ft = FeatureGenerator("O=C(Cc1ccccc1)Nc2ncc(s2)C3CCC3") | |
# features = ft.toTPATF() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment