Skip to content

Instantly share code, notes, and snippets.

@MichelML
Last active February 25, 2022 14:41
Show Gist options
  • Save MichelML/3a6c9b1d795d18785c743c6aec4fbb67 to your computer and use it in GitHub Desktop.
Save MichelML/3a6c9b1d795d18785c743c6aec4fbb67 to your computer and use it in GitHub Desktop.
import datamol as dm
# test structures taken from
# https://www.rdkit.org/docs/GettingStartedInPython.html#substructure-searching
test_smiles = "c1ccccc1O"
test_statement_substruct = "ccO"
_match_type_checkers = {
"WR": lambda matches_count, match_n_val, match_n_second_val: match_n_val
<= matches_count
<= match_n_second_val,
"EQ": lambda matches_count, match_n_val, match_n_second_val: match_n_val
== matches_count,
"LT": lambda matches_count, match_n_val, match_n_second_val: matches_count
< match_n_val,
"LTE": lambda matches_count, match_n_val, match_n_second_val: matches_count
<= match_n_val,
"GT": lambda matches_count, match_n_val, match_n_second_val: matches_count
> match_n_val,
"GTE": lambda matches_count, match_n_val, match_n_second_val: matches_count
>= match_n_val,
}
_bool_operators_map = {
"AND": " and",
"OR": " or",
None: ""
}
def is_matching_filter_statement(input_mol: dm.Mol, statement):
"""Matching function for a single statement within a single or multi-statements chemical filter
Args:
input_mol: dm.Mol
statement: _description_
Returns:
bool: True if the current smiles passes the filter statement, False if not
"""
logic_type = statement["logic_type"] # include/exclude
substruct = statement["structure"]
is_smarts = statement["is_smarts"]
match_type = statement[
"match_type"
] # within range/equal/lower than/lower or equal than/greater than/greater or equal than
match_n_val = statement["match_n_val"]
match_n_second_val = statement.get("match_n_second_val") # maybe None
qmol = dm.from_smarts(substruct) if is_smarts else dm.to_mol(substruct)
substruct_matches = input_mol.GetSubstructMatches(qmol)
matches_count = len(substruct_matches)
try:
is_matching = _match_type_checkers[match_type](
matches_count, match_n_val, match_n_second_val
)
except:
raise Exception(f"Match type specified, {match_type}, does not exist")
if logic_type not in ["include", "exclude"]:
raise Exception(f"Logic type specified, {logic_type}, does not exist")
return is_matching if logic_type == "include" else not is_matching
def is_matching_filter(input_mol: dm.Mol, statements=[]):
"""Matching function for list of statements of a chemical filter.
It is implied that there is no nested logic.
Args:
input_mol: dm.Mol
statements: list of statement
Returns:
bool: True if the current mol passes the chemical filter, False if not
"""
if len(statements) == 0:
return True
bool_chain = ""
for statement in statements:
is_single_match = is_mol_matching_filter_statement(input_mol, statement)
operator = _bool_operators_map[statement.get("chaining_operator")]
bool_chain += f"{is_single_match}{operator}"
return eval(bool_chain)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment