Last active
July 4, 2020 11:36
-
-
Save cstein/c776deac44acd2266ade5d520d57e69c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{"name": "Imidazoles", "reactant": "[n+;H]", "product": "n"}, | |
{"name": "Amines", "reactant": "[N+;!H0]", "product": "N"}, | |
{"name": "Carboxylic acids and alcohols", "reactant": "[$([O-]);!$([O-][#7])]", "product": "O"}, | |
{"name": "Thiols", "reactant": "[S-;X1]", "product": "S"}, | |
{"name": "Sulfonamides", "reactant": "[$([N-;X2]S(=O)=O)]", "product": "N"}, | |
{"name": "Enamines", "reactant": "[$([N-;X2][C,N]=C)]", "product": "N"}, | |
{"name": "Tetrazoles", "reactant": "[n-]", "product": "[nH]"}, | |
{"name": "Sulfoxides", "reactant": "[$([S-]=O)]", "product": "S"}, | |
{"name": "Amides", "reactant": "[$([N-]C=O)]", "product": "N"} | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Functionality to neutralize molecules """ | |
import json | |
import numpy as np | |
import copy | |
from rdkit import Chem | |
from sascorer import calculateScore | |
_neutralize_reactions = None | |
def read_neutralizers(name="neutralize"): | |
filename = f"{name}.json" | |
with open(filename) as json_file: | |
reactions = json.load(json_file) | |
neutralize_reactions = [] | |
for reaction in reactions: | |
n_s = reaction["name"] | |
r_s = reaction["reactant"] | |
p_s = reaction["product"] | |
r = Chem.MolFromSmarts(r_s) | |
p = Chem.MolFromSmiles(p_s, False) | |
assert r is not None and p is not None, "Either R or P is None" | |
neutralize_reactions.append((r, p)) | |
return neutralize_reactions | |
def neutralize_smiles(smiles): | |
""" Neutralize a set of SMILES | |
:param list smiles: a list of SMILES | |
""" | |
assert type(smiles) == list | |
charged_molecules = [Chem.MolFromSmiles(s) for s in smiles] | |
neutral_molecules = neutralize_molecules(charged_molecules) | |
return [Chem.MolToSmiles(m) for m in neutral_molecules] | |
def neutralize_molecules(charged_molecules): | |
""" Neutralize a set of molecules | |
:param list charged_molecules: list of (possibly) charged molecules | |
:return: list of neutral molecules | |
""" | |
assert type(charged_molecules) == list | |
global _neutralize_reactions | |
if _neutralize_reactions is None: | |
_neutralize_reactions = read_neutralizers() | |
neutral_molecules = [] | |
for c_mol in charged_molecules: | |
mol = copy.deepcopy(c_mol) | |
#mol = Chem.Mol(c_mol) | |
#mol.UpdatePropertyCache() | |
#Chem.rdmolops.FastFindRings(mol) | |
assert mol is not None | |
for reactant_mol, product_mol in _neutralize_reactions: | |
while mol.HasSubstructMatch(reactant_mol): | |
rms = Chem.ReplaceSubstructs(mol, reactant_mol, product_mol) | |
if rms[0] is not None: | |
mol = rms[0] | |
neutral_molecules.append(mol) | |
return neutral_molecules | |
def sa_score_modifier(sa_scores, mu = 2.230044, sigma = 0.6526308): | |
""" Computes a synthesizability multiplier for a (range of) synthetic accessibility score(s) | |
The multiplier is between 1 (perfectly synthesizable) and 0 (not synthesizable). | |
Based on the work of https://arxiv.org/pdf/2002.07007 | |
:param list sa_scores: list of synthetic availability scores | |
:param float mu: average synthetic availability score | |
:param float sigma: standard deviation of the score to accept | |
""" | |
mod_scores = np.maximum(sa_scores, mu) | |
return np.exp(-0.5 * np.power((mod_scores - mu) / sigma, 2.)) | |
def reweigh_scores_by_sa(population, scores): | |
sa_scores = sa_score_modifier([calculateScore(p) for p in population]) | |
scores = [ns * sa for ns, sa in zip(scores, sa_scores)] # rescale scores and force list type | |
return scores | |
if __name__ == '__main__': | |
s_q = "c1ccccc1C(C(=O)[O-])c2ccccc2" | |
s_q = "c1cccc(c12)cc(cc2)C[NH+]([NH3+])C(=O)[NH3+]" | |
s_n = neutralize_smiles([s_q]) | |
print("Q: ", s_q, calculateScore(Chem.MolFromSmiles(s_q))) | |
print("N: ", s_n, calculateScore(Chem.MolFromSmiles(s_n[0]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment