Last active
August 31, 2020 14:26
-
-
Save charnley/09b25cba91238df6a7478f5a1d36cf7c to your computer and use it in GitHub Desktop.
Hacking SMILES to get correct Hydrogens on molecule fragments with RDkit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from rdkit import Chem | |
# examples | |
# C=[NH+]C | |
component = "C=[NH+]" | |
component = "[NH+]C" | |
# C[S+](C)C | |
component = "C[S+]" | |
print(component) | |
mc = Chem.MolFromSmiles(component) | |
n_atoms = mc.GetNumAtoms() | |
n_bonds = len(mc.GetBonds()) | |
charges = np.zeros(n_atoms, dtype=int) | |
for idx in range(n_atoms): | |
atom = mc.GetAtomWithIdx(idx) | |
charges[idx] = atom.GetFormalCharge() | |
atom.SetNumExplicitHs(0) | |
atom.SetFormalCharge(0) | |
component = Chem.MolToSmiles(mc, canonical=False) | |
component = component.replace("[", "").replace("]","") | |
mc = Chem.MolFromSmiles(component) | |
for idx, charge in zip(range(n_atoms), charges): | |
atom = mc.GetAtomWithIdx(idx) | |
atom.SetFormalCharge(charge) | |
component = Chem.MolToSmiles(mc) | |
print(component) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's an alternative that uses RDKit functionality and doesn't rely on SMILES hacking: