greglandrum/SMILES atom regex.ipynb

## SMILES atom regex.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import re",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "atom_finder = re.compile(r'\\[[^\\]]+\\]|[A-Z][a-z]?|[a-z]')\nsmiles = 'C[C@@H](Cl)C(=O)c1c[13cH]ccc1'\nprint(atom_finder.findall(smiles))\n",
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": "['C', '[C@@H]', 'Cl', 'C', 'O', 'c', 'c', '[13cH]', 'c', 'c', 'c']\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ms = [x for x in atom_finder.finditer(smiles)]\n[(x.start(),x.end()) for x in ms]",
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 16,
          "data": {
            "text/plain": "[(0, 1),\n (1, 7),\n (8, 10),\n (11, 12),\n (14, 15),\n (16, 17),\n (18, 19),\n (19, 25),\n (25, 26),\n (26, 27),\n (27, 28)]"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python38264bitrdkitblogconda8e387449f04349d3a22f66dc2550acf5",
      "display_name": "Python 3.8.2 64-bit ('rdkit_blog': conda)",
      "language": "python"
    },
    "toc": {
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": false,
      "base_numbering": 1,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": false
    },
    "language_info": {
      "name": "python",
      "version": "3.9.4",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "SMILES atom regex.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import re",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "atom_finder = re.compile(r'\\[[^\\]]+\\]\|[A-Z][a-z]?\|[a-z]')\nsmiles = 'C[C@@H](Cl)C(=O)c1c[13cH]ccc1'\nprint(atom_finder.findall(smiles))\n",
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "stream",
	"text": "['C', '[C@@H]', 'Cl', 'C', 'O', 'c', 'c', '[13cH]', 'c', 'c', 'c']\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ms = [x for x in atom_finder.finditer(smiles)]\n[(x.start(),x.end()) for x in ms]",
	"execution_count": 16,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 16,
	"data": {
	"text/plain": "[(0, 1),\n (1, 7),\n (8, 10),\n (11, 12),\n (14, 15),\n (16, 17),\n (18, 19),\n (19, 25),\n (25, 26),\n (26, 27),\n (27, 28)]"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python38264bitrdkitblogconda8e387449f04349d3a22f66dc2550acf5",
	"display_name": "Python 3.8.2 64-bit ('rdkit_blog': conda)",
	"language": "python"
	},
	"toc": {
	"nav_menu": {},
	"number_sections": true,
	"sideBar": true,
	"skip_h1_title": false,
	"base_numbering": 1,
	"title_cell": "Table of Contents",
	"title_sidebar": "Contents",
	"toc_cell": false,
	"toc_position": {},
	"toc_section_display": true,
	"toc_window_display": false
	},
	"language_info": {
	"name": "python",
	"version": "3.9.4",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "SMILES atom regex.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}