john-bradshaw/split_reagents.py

## split_reagents.py
"""
Module for identifying for RDKit functions working with reaction SMILES strings.
"""


import collections
import itertools
import functools
import typing

from rdkit import Chem


class InconsistentSMILES(RuntimeError):
    pass


def get_atom_map_nums(mol_str) -> typing.Iterator[int]:
    """
    :return: iterator of the atom mapping numbers of the atoms in the reaction string
    """
    mol = Chem.MolFromSmiles(mol_str)
    return (int(a.GetProp('molAtomMapNumber')) for a in mol.GetAtoms() if a.HasProp('molAtomMapNumber'))


def _not_unique_elements(itr_in) -> bool:
    lst_ = list(itr_in)
    return True if len(lst_) != len(set(lst_)) else False


def get_changed_bonds(rxn_smi, strict_mode=False) -> typing.List[typing.Tuple[int, int, float]]:
    """
    Note unless `strict_mode` is `True`, this method does not check that the reagents have been seperated correctly
    (i.e., reagent atoms are not in the products) or that atom map numbers have been repeated in the reactants or
    products, which would break the rest of the implementation in a silent manner. If `strict_mode` is `True` and these
    conditions are violated then an `InconsistentSMILES` is raised.

    API should match the alternative:
    https://github.com/connorcoley/rexgen_direct/blob/master/rexgen_direct/scripts/prep_data.py

    :return: list of tuples of atom map numbers for each bond and new bond order
    """
    # 1. Split into reactants and products.
    reactants_smi, reagents_smi, products_smi = rxn_smi.split('>')

    # 2. If necessary check for repeated atom map numbers in the SMILES and products.
    if strict_mode:
        if _not_unique_elements(get_atom_map_nums(reactants_smi)):
            raise InconsistentSMILES("Repeated atom map numbers in reactants")
        if _not_unique_elements(get_atom_map_nums(products_smi)):
            raise InconsistentSMILES("Repeated atom maps numbers in products")

    # 3. Get the bonds and their types in reactants and products
    bonds_prev = {}
    bonds_new = {}
    for bond_dict, bonds in [(bonds_prev, Chem.MolFromSmiles(reactants_smi).GetBonds()),
                             (bonds_new,  Chem.MolFromSmiles(products_smi).GetBonds())]:
        for bond in bonds:
            try:
                bond_atmmap = frozenset((int(bond.GetBeginAtom().GetProp('molAtomMapNumber')),
                                         int(bond.GetEndAtom().GetProp('molAtomMapNumber'))))
            except KeyError:
                continue
            bond_dict[bond_atmmap] = float(bond.GetBondTypeAsDouble())

    # 4. Go through the bonds before and after...
    bond_changes: typing.List[typing.Tuple[int, int, float]] = []
    product_atmmap_nums = set(get_atom_map_nums(products_smi))
    if strict_mode and (len(set(get_atom_map_nums(reagents_smi)) & product_atmmap_nums) > 0):
        raise InconsistentSMILES("Reagent atoms end up in products.")
    for bnd in {*bonds_prev, *bonds_new}:
        bnd_different_btwn_reacts_and_products = not (bonds_prev.get(bnd, None) == bonds_new.get(bnd, None))
        bnd_missing_in_products = len(bnd & product_atmmap_nums) == 0

        # ... and if a bond has (a) changed or (b) is half missing in the products then it must have changed!
        if bnd_different_btwn_reacts_and_products and (not bnd_missing_in_products):
            bond_changes.append((*sorted(list(bnd)), bonds_new.get(bnd, 0.0)))
            # ^ note if no longer in products then new order is 0.

    return bond_changes


def canonicalize(smiles, remove_atm_mapping=True, **otherargs) -> str:
    """
    :param smiles: SMILES string to be canonicalized
    :param remove_atm_mapping: whether to remove the atom map number (if applicable from molecule), note that this
    affects canonicalization.
    :param otherargs: arguments to pass to Chem.MolToSmiles
    :return: canonicalized SMILES string
    """
    mol = Chem.MolFromSmiles(smiles)
    return canonicalize_from_molecule(mol, remove_atm_mapping, **otherargs)


def canonicalize_from_molecule(mol, remove_atm_mapping=True, **otherargs) -> str:
    """
    :param mol: RDKit molecule to canonicalize.
    :param remove_atm_mapping: whether to remove the atom map number (if applicable from molecule), note that this
    affects canonicalization.
    :param otherargs: arguments to pass to Chem.MolToSmiles
    :return: canonicalized SMILES string
    """
    mol_copy = Chem.RWMol(mol)
    if remove_atm_mapping:
        for atom in mol_copy.GetAtoms():
            atom.ClearProp('molAtomMapNumber')
    smiles = Chem.MolToSmiles(mol_copy, canonical=True, **otherargs)
    return smiles


def split_reagents_out_from_reactants_and_products(rxn_smi, strict_mode=False) -> typing.Tuple[str, str, str]:
    """
    Splits reaction into reactants, reagents, and products. Can deal with reagents in reactants part of SMILES string.
    Note that this method expects relatively well done atom mapping.

    Reagent defined as either:
    1. in the middle part of reaction SMILES string, i.e. inbetween the `>` tokens.
    2. in the reactants part of the SMILES string and all of these are true:
            a. no atoms in the product(s).
            b. not involved in the reaction center (atoms for which bonds change before and after) -- depending on the
                center identification code this will be covered by a, but is also checked to allow for cases where
                center can include information about changes in say a reactant that results in two undocumented minor
                products.
            c. reaction has been atom mapped (i.e., can we accurately check conditions a and b) -- currently judged by
                a center being able to be identified.
    3. in the reactants and products part of the SMILES string and both:
            a. not involved in reaction center
            b. unchanged before and after the reaction (comparing with canonicalized, atom-map removed strings)
    :param rxn_smi: the reaction SMILES string
    :param strict_mode: whether to run `get_changed_bonds` in strict mode when determining atom map numbers involved in
                        center.
    :return: tuple of reactants, reagents, and products
    """


    # 1. Split up reaction and get involved atom counts.
    reactant_all_str, reagents_all_str, product_all_str = rxn_smi.split('>')
    atoms_involved_in_reaction = set(
        itertools.chain(*[(int(el[0]), int(el[1])) for el in get_changed_bonds(rxn_smi, strict_mode)]))
    reactants_str = reactant_all_str.split('.')
    products_str = product_all_str.split('.')
    products_to_keep = collections.Counter(products_str)
    product_atom_map_nums = functools.reduce(lambda x, y: x | y, (set(get_atom_map_nums(prod)) for prod in products_str))
    reaction_been_atom_mapped = len(atoms_involved_in_reaction) > 0

    # 2. Store map from canonical products to multiset of their SMILES in the products --> we will class
    canon_products_to_orig_prods = collections.defaultdict(collections.Counter)
    for smi in products_str:
        canon_products_to_orig_prods[canonicalize(smi)].update([smi])


    # 3. Go through the remaining reactants and check for conditions 2 or 3.
    reactants = []
    reagents = reagents_all_str.split('.')
    for candidate_reactant in reactants_str:
        atom_map_nums_in_candidate_reactant = set(get_atom_map_nums(candidate_reactant))

        # compute some flags useful for checks 2 and 3
        # 2a any atoms in products
        not_in_product = len(list(product_atom_map_nums & atom_map_nums_in_candidate_reactant)) == 0
        # 2b/3a any atoms in reaction center
        not_in_center = len(list(set(atoms_involved_in_reaction & atom_map_nums_in_candidate_reactant))) == 0

        # Check 2.
        if (reaction_been_atom_mapped and not_in_product and not_in_center):
            reagents.append(candidate_reactant)
            continue

        # Check 3.
        canonical_reactant = canonicalize(candidate_reactant)
        reactant_possibly_unchanged_in_products = canonical_reactant in canon_products_to_orig_prods
        # ^ possibly as it could be different when we include atom maps -- we will check for this later.
        if not_in_center and reactant_possibly_unchanged_in_products:

            # We also need to match this reactant up with the appropriate product SMILES string and remove this from
            # the product.  To do this we shall go through the possible product SMILES strings.
            possible_prod = None
            for prod in canon_products_to_orig_prods[canonical_reactant]:

                # if the atom mapped numbers intersect then this must be the product we are after and can break!
                if len(set(get_atom_map_nums(prod)) & set(get_atom_map_nums(candidate_reactant))) > 0:
                    break

                # if the product in the reaction SMILES has no atom map numbers it _could_ match but check other
                # possibilities first to see if we get an atom map match.
                if len(list(get_atom_map_nums(prod))) == 0:
                    possible_prod = prod
            else:
                prod = possible_prod  # <-- if we are here then we did not get an exact atom map match

            if prod is not None:
                # ^ if it is still None then a false alarm and not the same molecule due to atom map numbers.
                # (we're going to defer to atom map numbers and assume they're right!)
                reagents.append(candidate_reactant)
                products_to_keep.subtract([prod])  # remove it from the products too

                # we also need to do some book keeping on our datastructure mapping canonical SMILES to product strings
                # to indicate that we have removed one.
                canon_products_to_orig_prods[canonical_reactant].subtract([prod])
                canon_products_to_orig_prods[canonical_reactant] += collections.Counter()
                # ^ remove zero and negative values
                if len(canon_products_to_orig_prods[canonical_reactant]) == 0:
                    del canon_products_to_orig_prods[canonical_reactant]

                continue

        # if passed check 2 and 3 then it is a reactant!
        reactants.append(candidate_reactant)

    product_all_str = '.'.join(products_to_keep.elements())
    return '.'.join(reactants), '.'.join(reagents), product_all_str


if __name__ == '__main__':
    examples = [
        (
            "[Br:6][c:7]1[c:8]([SH:13])[cH:9][cH:10][cH:11][cH:12]1.[C:14](=[O:15])([O-:16])[O-:17].[CH2:20]([CH3:21])[O:22]"
            "[CH:23]([CH2:24][Br:25])[O:26][CH2:27][CH3:28].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34].[K+:18].[K+:19].[O:1]"
            "=[CH:2][N:3]([CH3:4])[CH3:5].[OH2:35]>>[Br:6][c:7]1[c:8]([S:13][CH2:24][CH:23]([O:22][CH2:20][CH3:21])[O:26]"
            "[CH2:27][CH3:28])[cH:9][cH:10][cH:11][cH:12]1"),

        # Like above but demonstrating that unchanged molecules in products can also be classed as reagents:
        (
            "[Br:6][c:7]1[c:8]([SH:13])[cH:9][cH:10][cH:11][cH:12]1.[C:14](=[O:15])([O-:16])[O-:17].[CH2:20]([CH3:21])[O:22]"
            "[CH:23]([CH2:24][Br:25])[O:26][CH2:27][CH3:28].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34].[K+:18].[K+:19].[O:1]"
            "=[CH:2][N:3]([CH3:4])[CH3:5].[OH2:35]>>[Br:6][c:7]1[c:8]([S:13][CH2:24][CH:23]([O:22][CH2:20][CH3:21])[O:26]"
            "[CH2:27][CH3:28])[cH:9][cH:10][cH:11][cH:12]1.[K+:19].[K+:18].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34]"),

        (
            "[C:34](=[O:35])([OH:36])[O-:37].[CH2:1]([CH3:2])[N:3]([c:4]1[c:5](-[c:12]2[n:13]([CH3:23])[c:14]3[cH:15][c:16]"
            "([O:21][CH3:22])[cH:17][cH:18][c:19]3[cH:20]2)[cH:6][cH:7][c:8]([O:10][CH3:11])[cH:9]1)[CH2:24][CH3:25]"
            ".[Cl:26][N:27]1[C:28](=[O:29])[CH2:30][CH2:31][C:32]1=[O:33].[Na+:38]>[O:39]1[CH2:40][CH2:41][CH2:42][CH2:43]1"
            ">[CH2:1]([CH3:2])[N:3]([c:4]1[c:5](-[c:12]2[n:13]([CH3:23])[c:14]3[cH:15][c:16]([O:21][CH3:22])[cH:17][cH:18]"
            "[c:19]3[c:20]2[Cl:26])[cH:6][cH:7][c:8]([O:10][CH3:11])[cH:9]1)[CH2:24][CH3:25]")
    ]

    print("# Identifying centers")
    for eg in examples:
        print(eg)
        print(get_changed_bonds(eg))
        print()

    print("\n\n# Splitting reagents out")
    for eg in examples:
        print(eg)
        print('>'.join(split_reagents_out_from_reactants_and_products(eg)))
        print()

    print("Strict mode should fail with repeated atom maps...")
    smiles = "[CH:1][N:2]([CH3:3])[CH3:1].[OH2:5]>>[CH:1][N:2]([CH3:3])[CH3:1][OH2:5]"
    try:
        get_changed_bonds(smiles, strict_mode=True)
    except Exception as ex:
        print("Yes, exception thrown:", ex)
    else:
        print("No?!")
	"""
	Module for identifying for RDKit functions working with reaction SMILES strings.
	"""


	import collections
	import itertools
	import functools
	import typing

	from rdkit import Chem


	class InconsistentSMILES(RuntimeError):
	pass


	def get_atom_map_nums(mol_str) -> typing.Iterator[int]:
	"""
	:return: iterator of the atom mapping numbers of the atoms in the reaction string
	"""
	mol = Chem.MolFromSmiles(mol_str)
	return (int(a.GetProp('molAtomMapNumber')) for a in mol.GetAtoms() if a.HasProp('molAtomMapNumber'))


	def _not_unique_elements(itr_in) -> bool:
	lst_ = list(itr_in)
	return True if len(lst_) != len(set(lst_)) else False


	def get_changed_bonds(rxn_smi, strict_mode=False) -> typing.List[typing.Tuple[int, int, float]]:
	"""
	Note unless `strict_mode` is `True`, this method does not check that the reagents have been seperated correctly
	(i.e., reagent atoms are not in the products) or that atom map numbers have been repeated in the reactants or
	products, which would break the rest of the implementation in a silent manner. If `strict_mode` is `True` and these
	conditions are violated then an `InconsistentSMILES` is raised.

	API should match the alternative:
	https://github.com/connorcoley/rexgen_direct/blob/master/rexgen_direct/scripts/prep_data.py

	:return: list of tuples of atom map numbers for each bond and new bond order
	"""
	# 1. Split into reactants and products.
	reactants_smi, reagents_smi, products_smi = rxn_smi.split('>')

	# 2. If necessary check for repeated atom map numbers in the SMILES and products.
	if strict_mode:
	if _not_unique_elements(get_atom_map_nums(reactants_smi)):
	raise InconsistentSMILES("Repeated atom map numbers in reactants")
	if _not_unique_elements(get_atom_map_nums(products_smi)):
	raise InconsistentSMILES("Repeated atom maps numbers in products")

	# 3. Get the bonds and their types in reactants and products
	bonds_prev = {}
	bonds_new = {}
	for bond_dict, bonds in [(bonds_prev, Chem.MolFromSmiles(reactants_smi).GetBonds()),
	(bonds_new, Chem.MolFromSmiles(products_smi).GetBonds())]:
	for bond in bonds:
	try:
	bond_atmmap = frozenset((int(bond.GetBeginAtom().GetProp('molAtomMapNumber')),
	int(bond.GetEndAtom().GetProp('molAtomMapNumber'))))
	except KeyError:
	continue
	bond_dict[bond_atmmap] = float(bond.GetBondTypeAsDouble())

	# 4. Go through the bonds before and after...
	bond_changes: typing.List[typing.Tuple[int, int, float]] = []
	product_atmmap_nums = set(get_atom_map_nums(products_smi))
	if strict_mode and (len(set(get_atom_map_nums(reagents_smi)) & product_atmmap_nums) > 0):
	raise InconsistentSMILES("Reagent atoms end up in products.")
	for bnd in {bonds_prev, bonds_new}:
	bnd_different_btwn_reacts_and_products = not (bonds_prev.get(bnd, None) == bonds_new.get(bnd, None))
	bnd_missing_in_products = len(bnd & product_atmmap_nums) == 0

	# ... and if a bond has (a) changed or (b) is half missing in the products then it must have changed!
	if bnd_different_btwn_reacts_and_products and (not bnd_missing_in_products):
	bond_changes.append((*sorted(list(bnd)), bonds_new.get(bnd, 0.0)))
	# ^ note if no longer in products then new order is 0.

	return bond_changes


	def canonicalize(smiles, remove_atm_mapping=True, **otherargs) -> str:
	"""
	:param smiles: SMILES string to be canonicalized
	:param remove_atm_mapping: whether to remove the atom map number (if applicable from molecule), note that this
	affects canonicalization.
	:param otherargs: arguments to pass to Chem.MolToSmiles
	:return: canonicalized SMILES string
	"""
	mol = Chem.MolFromSmiles(smiles)
	return canonicalize_from_molecule(mol, remove_atm_mapping, **otherargs)


	def canonicalize_from_molecule(mol, remove_atm_mapping=True, **otherargs) -> str:
	"""
	:param mol: RDKit molecule to canonicalize.
	:param remove_atm_mapping: whether to remove the atom map number (if applicable from molecule), note that this
	affects canonicalization.
	:param otherargs: arguments to pass to Chem.MolToSmiles
	:return: canonicalized SMILES string
	"""
	mol_copy = Chem.RWMol(mol)
	if remove_atm_mapping:
	for atom in mol_copy.GetAtoms():
	atom.ClearProp('molAtomMapNumber')
	smiles = Chem.MolToSmiles(mol_copy, canonical=True, **otherargs)
	return smiles


	def split_reagents_out_from_reactants_and_products(rxn_smi, strict_mode=False) -> typing.Tuple[str, str, str]:
	"""
	Splits reaction into reactants, reagents, and products. Can deal with reagents in reactants part of SMILES string.
	Note that this method expects relatively well done atom mapping.

	Reagent defined as either:
	1. in the middle part of reaction SMILES string, i.e. inbetween the `>` tokens.
	2. in the reactants part of the SMILES string and all of these are true:
	a. no atoms in the product(s).
	b. not involved in the reaction center (atoms for which bonds change before and after) -- depending on the
	center identification code this will be covered by a, but is also checked to allow for cases where
	center can include information about changes in say a reactant that results in two undocumented minor
	products.
	c. reaction has been atom mapped (i.e., can we accurately check conditions a and b) -- currently judged by
	a center being able to be identified.
	3. in the reactants and products part of the SMILES string and both:
	a. not involved in reaction center
	b. unchanged before and after the reaction (comparing with canonicalized, atom-map removed strings)
	:param rxn_smi: the reaction SMILES string
	:param strict_mode: whether to run `get_changed_bonds` in strict mode when determining atom map numbers involved in
	center.
	:return: tuple of reactants, reagents, and products
	"""


	# 1. Split up reaction and get involved atom counts.
	reactant_all_str, reagents_all_str, product_all_str = rxn_smi.split('>')
	atoms_involved_in_reaction = set(
	itertools.chain(*[(int(el[0]), int(el[1])) for el in get_changed_bonds(rxn_smi, strict_mode)]))
	reactants_str = reactant_all_str.split('.')
	products_str = product_all_str.split('.')
	products_to_keep = collections.Counter(products_str)
	product_atom_map_nums = functools.reduce(lambda x, y: x \| y, (set(get_atom_map_nums(prod)) for prod in products_str))
	reaction_been_atom_mapped = len(atoms_involved_in_reaction) > 0

	# 2. Store map from canonical products to multiset of their SMILES in the products --> we will class
	canon_products_to_orig_prods = collections.defaultdict(collections.Counter)
	for smi in products_str:
	canon_products_to_orig_prods[canonicalize(smi)].update([smi])


	# 3. Go through the remaining reactants and check for conditions 2 or 3.
	reactants = []
	reagents = reagents_all_str.split('.')
	for candidate_reactant in reactants_str:
	atom_map_nums_in_candidate_reactant = set(get_atom_map_nums(candidate_reactant))

	# compute some flags useful for checks 2 and 3
	# 2a any atoms in products
	not_in_product = len(list(product_atom_map_nums & atom_map_nums_in_candidate_reactant)) == 0
	# 2b/3a any atoms in reaction center
	not_in_center = len(list(set(atoms_involved_in_reaction & atom_map_nums_in_candidate_reactant))) == 0

	# Check 2.
	if (reaction_been_atom_mapped and not_in_product and not_in_center):
	reagents.append(candidate_reactant)
	continue

	# Check 3.
	canonical_reactant = canonicalize(candidate_reactant)
	reactant_possibly_unchanged_in_products = canonical_reactant in canon_products_to_orig_prods
	# ^ possibly as it could be different when we include atom maps -- we will check for this later.
	if not_in_center and reactant_possibly_unchanged_in_products:

	# We also need to match this reactant up with the appropriate product SMILES string and remove this from
	# the product. To do this we shall go through the possible product SMILES strings.
	possible_prod = None
	for prod in canon_products_to_orig_prods[canonical_reactant]:

	# if the atom mapped numbers intersect then this must be the product we are after and can break!
	if len(set(get_atom_map_nums(prod)) & set(get_atom_map_nums(candidate_reactant))) > 0:
	break

	# if the product in the reaction SMILES has no atom map numbers it _could_ match but check other
	# possibilities first to see if we get an atom map match.
	if len(list(get_atom_map_nums(prod))) == 0:
	possible_prod = prod
	else:
	prod = possible_prod # <-- if we are here then we did not get an exact atom map match

	if prod is not None:
	# ^ if it is still None then a false alarm and not the same molecule due to atom map numbers.
	# (we're going to defer to atom map numbers and assume they're right!)
	reagents.append(candidate_reactant)
	products_to_keep.subtract([prod]) # remove it from the products too

	# we also need to do some book keeping on our datastructure mapping canonical SMILES to product strings
	# to indicate that we have removed one.
	canon_products_to_orig_prods[canonical_reactant].subtract([prod])
	canon_products_to_orig_prods[canonical_reactant] += collections.Counter()
	# ^ remove zero and negative values
	if len(canon_products_to_orig_prods[canonical_reactant]) == 0:
	del canon_products_to_orig_prods[canonical_reactant]

	continue

	# if passed check 2 and 3 then it is a reactant!
	reactants.append(candidate_reactant)

	product_all_str = '.'.join(products_to_keep.elements())
	return '.'.join(reactants), '.'.join(reagents), product_all_str


	if __name__ == '__main__':
	examples = [
	(
	"[Br:6][c:7]1[c:8]([SH:13])[cH:9][cH:10][cH:11][cH:12]1.[C:14](=[O:15])([O-:16])[O-:17].[CH2:20]([CH3:21])[O:22]"
	"[CH:23]([CH2:24][Br:25])[O:26][CH2:27][CH3:28].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34].[K+:18].[K+:19].[O:1]"
	"=[CH:2][N:3]([CH3:4])[CH3:5].[OH2:35]>>[Br:6][c:7]1[c:8]([S:13][CH2:24][CH:23]([O:22][CH2:20][CH3:21])[O:26]"
	"[CH2:27][CH3:28])[cH:9][cH:10][cH:11][cH:12]1"),

	# Like above but demonstrating that unchanged molecules in products can also be classed as reagents:
	(
	"[Br:6][c:7]1[c:8]([SH:13])[cH:9][cH:10][cH:11][cH:12]1.[C:14](=[O:15])([O-:16])[O-:17].[CH2:20]([CH3:21])[O:22]"
	"[CH:23]([CH2:24][Br:25])[O:26][CH2:27][CH3:28].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34].[K+:18].[K+:19].[O:1]"
	"=[CH:2][N:3]([CH3:4])[CH3:5].[OH2:35]>>[Br:6][c:7]1[c:8]([S:13][CH2:24][CH:23]([O:22][CH2:20][CH3:21])[O:26]"
	"[CH2:27][CH3:28])[cH:9][cH:10][cH:11][cH:12]1.[K+:19].[K+:18].[CH3:29][CH2:30][O:31][C:32](=[O:33])[CH3:34]"),

	(
	"[C:34](=[O:35])([OH:36])[O-:37].[CH2:1]([CH3:2])[N:3]([c:4]1[c:5](-[c:12]2[n:13]([CH3:23])[c:14]3[cH:15][c:16]"
	"([O:21][CH3:22])[cH:17][cH:18][c:19]3[cH:20]2)[cH:6][cH:7][c:8]([O:10][CH3:11])[cH:9]1)[CH2:24][CH3:25]"
	".[Cl:26][N:27]1[C:28](=[O:29])[CH2:30][CH2:31][C:32]1=[O:33].[Na+:38]>[O:39]1[CH2:40][CH2:41][CH2:42][CH2:43]1"
	">[CH2:1]([CH3:2])[N:3]([c:4]1[c:5](-[c:12]2[n:13]([CH3:23])[c:14]3[cH:15][c:16]([O:21][CH3:22])[cH:17][cH:18]"
	"[c:19]3[c:20]2[Cl:26])[cH:6][cH:7][c:8]([O:10][CH3:11])[cH:9]1)[CH2:24][CH3:25]")
	]

	print("# Identifying centers")
	for eg in examples:
	print(eg)
	print(get_changed_bonds(eg))
	print()

	print("\n\n# Splitting reagents out")
	for eg in examples:
	print(eg)
	print('>'.join(split_reagents_out_from_reactants_and_products(eg)))
	print()

	print("Strict mode should fail with repeated atom maps...")
	smiles = "[CH:1][N:2]([CH3:3])[CH3:1].[OH2:5]>>[CH:1][N:2]([CH3:3])[CH3:1][OH2:5]"
	try:
	get_changed_bonds(smiles, strict_mode=True)
	except Exception as ex:
	print("Yes, exception thrown:", ex)
	else:
	print("No?!")