Skip to content

Instantly share code, notes, and snippets.

@charnley
Last active August 31, 2020 14:26
Show Gist options
  • Save charnley/09b25cba91238df6a7478f5a1d36cf7c to your computer and use it in GitHub Desktop.
Save charnley/09b25cba91238df6a7478f5a1d36cf7c to your computer and use it in GitHub Desktop.
Hacking SMILES to get correct Hydrogens on molecule fragments with RDkit
import numpy as np
from rdkit import Chem
# examples
# C=[NH+]C
component = "C=[NH+]"
component = "[NH+]C"
# C[S+](C)C
component = "C[S+]"
print(component)
mc = Chem.MolFromSmiles(component)
n_atoms = mc.GetNumAtoms()
n_bonds = len(mc.GetBonds())
charges = np.zeros(n_atoms, dtype=int)
for idx in range(n_atoms):
atom = mc.GetAtomWithIdx(idx)
charges[idx] = atom.GetFormalCharge()
atom.SetNumExplicitHs(0)
atom.SetFormalCharge(0)
component = Chem.MolToSmiles(mc, canonical=False)
component = component.replace("[", "").replace("]","")
mc = Chem.MolFromSmiles(component)
for idx, charge in zip(range(n_atoms), charges):
atom = mc.GetAtomWithIdx(idx)
atom.SetFormalCharge(charge)
component = Chem.MolToSmiles(mc)
print(component)
@greglandrum
Copy link

Here's an alternative that uses RDKit functionality and doesn't rely on SMILES hacking:

In [15]: def adjustMolHs(mol):
    ...:     organicSubset = (5, 6, 7, 8, 9, 15, 16, 17, 35, 53)
    ...:     for at in mol.GetAtoms():
    ...:         if at.GetAtomicNum() not in organicSubset:
    ...:             continue
    ...:         at.SetNoImplicit(False)
    ...:         at.SetNumExplicitHs(0)
    ...:         at.SetNumRadicalElectrons(0)
    ...:     Chem.SanitizeMol(mol)
    ...:     

In [16]: m = Chem.MolFromSmiles('C=[NH+]')

In [17]: adjustMolHs(m)

In [18]: Chem.MolToSmiles(m)
Out[18]: 'C=[NH2+]'

In [19]: m2 = Chem.MolFromSmiles('[NH+]C')

In [20]: adjustMolHs(m2)

In [21]: Chem.MolToSmiles(m2)
Out[21]: 'C[NH3+]'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment