Created
December 10, 2022 13:40
-
-
Save marcosfelt/f50afda527c539c476af2aa3d085edf5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (C) 2003 Rational Discovery LLC | |
// This file is part of the RDKit. | |
// The contents are covered by the terms of the BSD license | |
// which is included in the file license.txt, found at the root | |
// of the RDKit source tree. | |
// NOTES: | |
// - larger functional groups should come before smaller ones involving | |
// the same core atom | |
// - the first atom in the SMARTS query is not part of the group itself | |
// - it is essential that the fields in this file be separated by tabs | |
// (not spaces). | |
// | |
// Label SMARTS Notes | |
//-------------------------------------------------- | |
-NC(=O)CH3 *-[N;D2]-[C;D3](=O)-[C;D1;H3] methyl amide | |
-C(=O)O *-C(=O)[O;D1] carboxylic acids | |
-C(=O)OMe *-C(=O)[O;D2]-[C;D1;H3] carbonyl methyl ester | |
-C(=O)H *-C(=O)-[C;D1] terminal aldehyde | |
-C(=O)N *-C(=O)-[N;D1] amide | |
-C(=O)CH3 *-C(=O)-[C;D1;H3] carbonyl methyl | |
-N=C=O *-[N;D2]=[C;D2]=[O;D1] isocyanate | |
-N=C=S *-[N;D2]=[C;D2]=[S;D1] isothiocyanate | |
//-------------------------------------------------- | |
// Nitrogen containing | |
-NO2 *-[N;D3](=[O;D1])[O;D1] nitro | |
-N=O *-[N;R0]=[O;D1] nitroso | |
=N-O *=[N;R0]-[O;D1] oximes | |
=NCH3 *=[N;R0]-[C;D1;H3] Imines | |
-N=CH2 *-[N;R0]=[C;D1;H2] Imines | |
-N=NCH3 *-[N;D2]=[N;D2]-[C;D1;H3] terminal azo | |
-N=N *-[N;D2]=[N;D1] hydrazines | |
-N#N *-[N;D2]#[N;D1] diazo | |
-C#N *-[C;D2]#[N;D1] cyano | |
//-------------------------------------------------- | |
// S containing | |
-SO2NH2 *-[S;D4](=[O;D1])(=[O;D1])-[N;D1] primary sulfonamide | |
-NHSO2CH3 *-[N;D2]-[S;D4](=[O;D1])(=[O;D1])-[C;D1;H3] methyl sulfonamide | |
-SO3H *-[S;D4](=O)(=O)-[O;D1] sulfonic acid | |
-SO3CH3 *-[S;D4](=O)(=O)-[O;D2]-[C;D1;H3] methyl ester sulfonyl | |
-SO2CH3 *-[S;D4](=O)(=O)-[C;D1;H3] methyl sulfonyl | |
-SO2Cl *-[S;D4](=O)(=O)-[Cl] sulfonyl chloride | |
-SOCH3 *-[S;D3](=O)-[C;D1] methyl sulfinyl | |
-SCH3 *-[S;D2]-[C;D1;H3] methylthio | |
-S *-[S;D1] thiols | |
=S *=[S;D1] thiocarbonyls | |
//-------------------------------------------------- | |
//Miscellaneous fragments: | |
-X *-[#9,#17,#35,#53] halogens | |
-tBu *-[C;D4]([C;D1])([C;D1])-[C;D1] t-butyl | |
-CF3 *-[C;D4](F)(F)F trifluoromethyl | |
-C#CH *-[C;D2]#[C;D1;H] acetylenes | |
-cPropyl *-[C;D3]1-[C;D2]-[C;D2]1 cyclopropyl | |
//-------------------------------------------------- | |
//Really teeny stuff: | |
-OEt *-[O;D2]-[C;D2]-[C;D1;H3] ethoxy | |
-OMe *-[O;D2]-[C;D1;H3] methoxy | |
-O *-[O;D1] side-chain hydroxyls | |
=O *=[O;D1] side-chain aldehydes or ketones | |
-N *-[N;D1] primary amines | |
=N *=[N;D1] ??? | |
#N *#[N;D1] nitriles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rdkit import Chem | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# Get fragments | |
fragments_df = pd.read_csv("fragments.txt", sep="\t", comment="/", names=["label", "smarts", "name"]) | |
fragments = {row["name"]: Chem.MolFromSmarts(row["smarts"]) for _,row in fragments_df.iterrows()} | |
def get_functional_groups(mol, fragments): | |
return {label: len(mol.GetSubstructMatches(fragment)) for label, fragment in fragments.items()} | |
# Get functional groups | |
smiles_list = ["CO",...] # Put your list of smiles here | |
mols = [Chem.MolFromSmiles(smi) for smi in mols] | |
functional_groups = [get_functional_groups(mol, fragments) for mol in mols] | |
functional_group_df = pd.DataFrame(functional_groups) | |
#Make figure | |
fig, ax = plt.subplots(1) | |
sums[:10].plot.bar(ax=ax, rot=45, ) | |
ax.tick_params(direction="in") | |
ax.set_ylabel("Counts") | |
ax.set_xticklabels(sums[:10].index, rotation=30, ha='right') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment