Skip to content

Instantly share code, notes, and snippets.

@MShafquat
Created October 25, 2020 19:05
Show Gist options
  • Save MShafquat/5376d78151919769c353385101ddd821 to your computer and use it in GitHub Desktop.
Save MShafquat/5376d78151919769c353385101ddd821 to your computer and use it in GitHub Desktop.
Read compounds from multiple SDFs and write calculations to a single SDF file
# necessary imports
import pandas as pd
from rdkit import Chem
from rdkit.Chem import PandasTools
from rdkit.Chem import Crippen
import glob
import pandas as pd
# Dataframe to write calculations of each compounds
compounds = pd.DataFrame(columns=['ID', 'Smiles', 'Molecular_Formula', 'Molecular_Weight', 'H_Bond_Acceptors',
'H_Bond_Donors', 'Molar_Refractivity', 'TPSA'])
# skip duplicates
checkDuplicateSmiles = dict()
i = 0 # index counter
for file in glob.iglob('./**//*.sdf', recursive=True): # data is saved in subdirectories of current directory
sdf = Chem.SDMolSupplier(file) # read sdf
for mol in sdf:
smiles = Chem.MolToSmiles(mol) # get smiles
if checkDuplicateSmiles.get(smiles, -1) == -1: # if this is not already processed
checkDuplicateSmiles[smiles] = True # mark it as processed
i +=1 # index counter
molecular_formula = Chem.rdMolDescriptors.CalcMolFormula(mol) # formula
molecular_weight = Chem.rdMolDescriptors.CalcExactMolWt(mol) # weight
hba = Chem.rdMolDescriptors.CalcNumHBA(mol) # h bond acceptor
hbd = Chem.rdMolDescriptors.CalcNumHBD(mol) # h bond donor
molar_refractivity = Chem.Crippen.MolMR(mol) # molar refractivity
tpsa = Chem.rdMolDescriptors.CalcTPSA(mol) # tpsa
compounds = compounds.append({ # write this row to dataframe
'ID': 'Phytochem_' + str(i).zfill(5),
'Smiles': smiles,
'Molecular_Formula': molecular_formula,
'Molecular_Weight': molecular_weight,
'H_Bond_Acceptors': hba,
'H_Bond_Donors': hbd,
'Molar_Refractivity': molar_refractivity,
'TPSA': tpsa
}, ignore_index=True)
# add molecule column
PandasTools.AddMoleculeColumnToFrame(compounds,'Smiles','ROMol',includeFingerprints=True)
# now write to a single sdf
PandasTools.WriteSDF(compounds, 'new_sdf.sdf', molColName='ROMol', idName='ID', properties=list(compounds.columns))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment