Skip to content

Instantly share code, notes, and snippets.

@sshojiro
Created August 4, 2018 11:37
Show Gist options
  • Save sshojiro/c156c351fbc4e05e478a6acc1b7d4949 to your computer and use it in GitHub Desktop.
Save sshojiro/c156c351fbc4e05e478a6acc1b7d4949 to your computer and use it in GitHub Desktop.
Count molecular fragments using RDKit in Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Count Fragments\n",
"\n",
"Count fragments raised in MACCS key list."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"from rdkit.Chem import MACCSkeys\n",
"\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"FIXME:\n",
"This code ignore the following types of fragments in MACCS key\n",
"\n",
"> 1, ISOTOPE\n",
">\n",
"> 125, Aromatic Ring > 1\n",
">\n",
"> 166, fragments\n",
">\n",
"> c.f. https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 1., 0., 0., 0., 0., 0., 0., 0., 0., 4., 0., 0., 6.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 4., 0., 3., 0.,\n",
" 2., 3., 0., 0., 0., 0., 2., 6., 0., 0., 0., 0., 0.,\n",
" 1., 1., 0., 0., 0., 0., 3., 0., 5., 0., 0., 0., 0.,\n",
" 0., 1., 0., 0., 0., 1., 2., 0., 0., 0., 2., 1., 1.,\n",
" 2., 0., 5., 5., 2., 0., 0., 0., 0., 0., 2., 7., 0.,\n",
" 2., 0., 6., 0., 6., 1., 5., 5., 0., 1., 2., 7., 0.,\n",
" 1., 5., 1., 2., 2., 2., 1., 2., 0., 5., 1., 1., 5.,\n",
" 0., 10., 1., 2., 7., 24., 5., 1., 30., 0.])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smi = \"CN1CCN(CC2=CC=C(C=C2)C(=O)NC2=CC(NC3=NC=CC(=N3)C3=CN=CC=C3)=C(C)C=C2)CC1\"\n",
"mol = Chem.MolFromSmiles(smi)\n",
"\n",
"\n",
"fingerprint_maccs167 = {str(k-1):v[0] for k,v in MACCSkeys.smartsPatts.items()}\n",
"\n",
"list_fragment_counts = np.zeros(len(fingerprint_maccs167))\n",
"for idx_str, smarts in fingerprint_maccs167.items():\n",
" if smarts is not '?':\n",
"\n",
" substruct = Chem.MolFromSmarts(smarts)\n",
" list_fragment_counts[int(idx_str)] = len(mol.GetSubstructMatches(substruct))\n",
"\n",
"list_fragment_counts"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAF8klEQVR4nO3d0XKjOBBAUbM1///L7AMVhjHGYLWIW9I5T042w7pMfCMwSNM8zw8ASv337ScA0DYZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVG6Mk3Tt58Cw/nz7ScAdSwBned5fVBxm/FN0bHJrwitexm7YAEFlOtklIadxm6aPv4NF1A+JaO06mUi99+8nkUBpYyM0p6Co/j3w1IBJUJGaUnkNOgdp1DhIaPUt15yVPVXq1YE7/hAn8HJKFVN0996bh9Ht1p+xvPiBqGY60ZpjFEk2biLiZYso8jg1aBQl4zSEoNQEnJQT1XzfNNHTJCW0Sg3mOd/egpdk1GAEBkFCJFRgBAZBQiRUYAQGaWyafcA+iajACEySk3rlB83zf3hbk4SklFaatMywV1DT5gRuBl0aPetplnR0wYjo1235HMHsy4O6u7VNGt1eXuW4GmDxf8LU+1Rl4wO52WP3nxZts3N98t/xfbzPgdjekfoQUYHcr1BdVfTXA7KK/6ivZwM/+w53BJ6eMjoIMoGcXVX06wb07p/EqqHnqHIaOcqHgWXbWG3wd8bln439IxDRrt1x2cyCc8nJg89I5DRbkXGZacbzCZz6Ome60b7lzB/+2WYg2PAW2+dgvfcxdS/+CC01jP5d7P1t1mxoduntzx25xRHZJTv2K/VNE25UpXqyZCZjJLFsg5eHvlDTxLOjfI1zS0emqry5GE0yjclD1NzoecrZJQv2NZzeZy2p2mfGHnIKLyw1HN7X5OeckRGAUJkFCBERgFCZBQgREb7ZwG4AC8d52S0f1bThFvJ6BDmeV4nkSv4t9WfD/RERju0jD33g9Dikj6cGYBj7qnvytOkxfslMAsmNjYRMrxnmttOnMauYBXlwQO6DsCHfQW4SEZ7cHHW96csvqmkgD6uvVDwkNHWFbzD3w9LB0/GND2O3hRWKOGI34xWxRa/rLyaZgderq68+xvz4mdARttTq3dW01wcBfRxeMbjxc8zMhltTPVDS8eqe1eWKbWcPSsXPI1OQ7eujzS9bKxcfs9A9ssmb79cBpj6yKdklLEc3Y1VFtA3XWYcMspYqq9Sp57IaHMcc+Zi9VBklOEIH3XJKCOq+zmSLg9ORhnIftnkultmTDIKhdYr8N3XNDgZbYz3KmQjowAhMgoQIqMMyMfq1CSjUGzaPWBEMtoMt29DTjLaEvWsy6rRVCGjLXG3TF3LzP9iSpBpm1M7va7bHOxBy6zVZcuomPGahQUkktoH9OieGSUtcLSo36dvh5vWs7KyS1vsrYxelnH7zacfcDNimX37rtfwptVVrTPYIhnNJbhssr1ZYP+6vd8Lp/uobEdY9bpd3nhZVHnDeNeVuX6MfzGRH+2Il4PiN1+Sjd2TQt33iZiWeb8XCl7V0936ZptP/8k+zUxGU7D6fBLVj6yP/u3FbRqWNsEFT/DX0/VP8THg/oKqj7b59Bx89JSTjMKztXe1ahW/OnV9MpFNcRN3McFrd0SqeJvrOHT98uk7fJGMwm8ry59bV9OSUWiGQWhOMgqNcUo0GxkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZTaHufX7ucoHfZIanLKrMKmTinxGYdTQbGc3l547pj98m0/QwCXcrineTP5M5yWg6P8PS5fGlf/KzUKh3V7f8mczMjkntdA16Syu3peB4/GgXG5nmIaMNO40sCX24aKiANkBG4QsK/gQux/UPAc1HRtuwfdcZhPbh+gkZp26S8xFTM9SzM8vePE2k/Z6fy++bMc8Pl9X3Z55PKqmh+ckoQIiMtsSAtG/bnWtHN0RGG+MQr2/q2SIZbcPFjyNonQOOFskoQIiMQi4GpM2RUUjHeZu2yChksa2nkjZERgFCZLQtzplBOjIKECKjACEyChAiowAhMtoSc2xDQjIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAh/wMPmcnPuncvwgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x2322841f530>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mol"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment