Skip to content

Instantly share code, notes, and snippets.

@greglandrum
Last active January 10, 2020 00:07
Show Gist options
  • Save greglandrum/d31ae7618cc5b7322a7121a529bf8190 to your computer and use it in GitHub Desktop.
Save greglandrum/d31ae7618cc5b7322a7121a529bf8190 to your computer and use it in GitHub Desktop.
Kramer_AtomInvariants.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from rdkit import Chem\nfrom rdkit.Chem import rdMolDescriptors\nimport rdkit\nprint(rdkit.__version__)",
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": "2019.09.2\n",
"name": "stdout"
},
{
"output_type": "stream",
"text": "RDKit WARNING: [13:43:33] Enabling RDKit 2019.09.2 jupyter extensions\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "phenyl = Chem.MolFromSmiles(\"[*:1]c1ccccc1\")\ncyclohexyl = Chem.MolFromSmiles(\"[*:1]C1=CCCCC1\")\ntolyl = Chem.MolFromSmiles(\"[*:1]c1ccccc1C\")",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(rdMolDescriptors.GetMorganFingerprint(phenyl,0,fromAtoms=[0]).GetNonzeroElements())\nprint(rdMolDescriptors.GetMorganFingerprint(cyclohexyl,0,fromAtoms=[0]).GetNonzeroElements())\nprint(rdMolDescriptors.GetMorganFingerprint(tolyl,0,fromAtoms=[0]).GetNonzeroElements())\n",
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": "{2342113506: 1}\n{2342113506: 1}\n{2342113506: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def get_simple_morgan(m,radius,includeAromaticity=False,**kwargs):\n if not includeAromaticity:\n invars = [x.GetAtomicNum() for x in m.GetAtoms()]\n else:\n invars = [x.GetAtomicNum()|(1000+x.GetIsAromatic()) for x in m.GetAtoms()]\n return rdMolDescriptors.GetMorganFingerprint(m,radius,invariants=invars,**kwargs)",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(get_simple_morgan(phenyl,1,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(cyclohexyl,1,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(tolyl,1,fromAtoms=[0]).GetNonzeroElements())\n",
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"text": "{0: 1, 3205496007: 1}\n{0: 1, 3205496007: 1}\n{0: 1, 3205496007: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(get_simple_morgan(phenyl,1,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(cyclohexyl,1,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(tolyl,1,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())",
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"text": "{1000: 1, 3205368108: 1}\n{1000: 1, 3205368113: 1}\n{1000: 1, 3205368108: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(get_simple_morgan(phenyl,2,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(cyclohexyl,2,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())\nprint(get_simple_morgan(tolyl,2,includeAromaticity=True,fromAtoms=[0]).GetNonzeroElements())",
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": "{1000: 1, 1524513631: 1, 3205368108: 1}\n{1000: 1, 1525144991: 1, 3205368113: 1}\n{1000: 1, 1524513631: 1, 3205368108: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import hashlib\ndef get_smiles_morgan(m,radius,**kwargs):\n smis = [Chem.Atom.GetSmarts(x) for x in m.GetAtoms()]\n invars = []\n for x in m.GetAtoms():\n # there's almost certainly a more performant way to do this, but....\n h = hashlib.md5()\n h.update(x.GetSmarts().encode())\n invars.append(int.from_bytes(h.digest()[:4],'little'))\n return rdMolDescriptors.GetMorganFingerprint(m,radius,invariants=invars,**kwargs)",
"execution_count": 32,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(get_smiles_morgan(phenyl,2,fromAtoms=[0]).GetNonzeroElements())\nprint(get_smiles_morgan(cyclohexyl,2,fromAtoms=[0]).GetNonzeroElements())\nprint(get_smiles_morgan(tolyl,2,fromAtoms=[0]).GetNonzeroElements())",
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"text": "{1916351442: 1, 2355797165: 1, 3887229975: 1}\n{1594197318: 1, 1916351442: 1, 3294283874: 1}\n{1916351442: 1, 2355797165: 1, 3887229975: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Note that this is sensitive to things like atom map numbers:"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "phenyl2 = Chem.MolFromSmiles(\"[*:2]c1ccccc1\")\nprint(get_smiles_morgan(phenyl2,2,fromAtoms=[0]).GetNonzeroElements())",
"execution_count": 38,
"outputs": [
{
"output_type": "stream",
"text": "{1272349123: 1, 1695625860: 1, 2878854048: 1}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.5",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "d31ae7618cc5b7322a7121a529bf8190",
"data": {
"description": "Kramer_AtomInvariants.ipynb",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/d31ae7618cc5b7322a7121a529bf8190"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment