Skip to content

Instantly share code, notes, and snippets.

@matteoferla
Created May 23, 2024 13:54
Show Gist options
  • Save matteoferla/49ebe1ef747162dc2e8d39f1f047f5e3 to your computer and use it in GitHub Desktop.
Save matteoferla/49ebe1ef747162dc2e8d39f1f047f5e3 to your computer and use it in GitHub Desktop.
chemical isomorphism
"""
Chemical isomorphism refers to the phenomenon where elements of similar electron density,
such as carbon (C), nitrogen (N), and oxygen (O), cannot be easily distinguished in the electron density map.
This is because these atoms scatter X-rays similarly, resulting in overlapping or indistinguishable electron density.
This isomorphism is also called `crystallographic ambiguity`.
.. code-block:: python
isomorphics: List[Chem.Mol] = get_chemical_isomorphisms(hit)
"""
from typing import List, Dict
from arthorian_quest import enquire # used simply to make all Chem.Atom instances QueryAtom instances
from rdkit import Chem
def get_chemical_isomorphisms(mol: Chem.Mol, suffix='-iso') -> List[Chem.Mol]:
"""
Given a molecule, return a list of molecules where crystallographical ambiguity is resolved.
"""
query: Chem.Mol = create_unelemental_query(mol)
# these include all isomorphisms
to_idx_map = lambda indxs: dict(zip(range(mol.GetNumHeavyAtoms()), indxs)) # noqa
idx_maps = [to_idx_map(qidxs) for qidxs in mol.GetSubstructMatches(query, uniquify=False)]
# these are replacement isomorphism
idx_maps = remove_elemental_isomorphisms(idx_maps, mol)
if len(idx_maps) == 0:
raise ValueError(f'An unexplained error: no replacement_isomorphic mappings - before filtering: {idx_maps}')
elif len(idx_maps) == 1:
return [mol]
else:
return maps_to_mols(idx_maps, mol, suffix)
# dependent functions
def maps_to_mols(idx_maps: List[Dict[int, int]], mol: Chem.Mol, suffix) -> List[Chem.Mol]:
conformer: Chem.Conformer = mol.GetConformer() # noqa
copies = []
name = mol.GetProp('_Name') if mol.HasProp('_Name') else '_'
for mi, mapping in enumerate(idx_maps):
if mi == 0:
# original was spiked to the front of the mappings
copies.append(mol)
continue
copy = Chem.Mol(mol)
coconfomer = copy.GetConformer() # noqa
for q, t in mapping.items():
coconfomer.SetAtomPosition(q, conformer.GetAtomPosition(t))
copy.SetProp('_Name', f'{name}{suffix}{mi}')
copies.append(copy)
return copies
def remove_elemental_isomorphisms(idx_maps: List[Dict[int, int]], mol: Chem.Mol) -> List[Dict[int, int]]:
"""
Benzene can be mapped twelve ways. I do not want this. Only maps where the elements differ
"""
unique = []
seen = []
# spiking in the original for zeroth position
for m in [dict(zip(range(mol.GetNumHeavyAtoms(), mol.GetNumHeavyAtoms())))] + idx_maps: # noqa
# this could be done mathematically, but this is quicker to write
element_hash = ''.join([mol.GetAtomWithIdx(i).GetSymbol() for i in m.values()])
if element_hash in seen:
continue
unique.append(m)
seen.append(element_hash)
return unique
def create_unelemental_query(mol: Chem.Mol) -> Chem.Mol:
"""
A Query mol where the elements are stripped.
Changing the atomic Zahlen to 0, will not work as the atoms have to be QueryAtoms,
which is what `arthorian_quest.enquire` does.
"""
subs = {}
for atom in mol.GetAtoms(): # noqa
if atom.GetSymbol() == 'H':
subs[atom.GetIdx()] = None
elif atom.GetSymbol() not in 'CNO':
continue
elif not atom.GetIsAromatic():
subs[atom.GetIdx()] = '[C,N,O]'
else:
subs[atom.GetIdx()] = 'a'
return enquire(mol, subs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment