Created
August 4, 2018 11:37
-
-
Save sshojiro/c156c351fbc4e05e478a6acc1b7d4949 to your computer and use it in GitHub Desktop.
Count molecular fragments using RDKit in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Count Fragments\n", | |
"\n", | |
"Count fragments raised in MACCS key list." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit import Chem\n", | |
"from rdkit.Chem.Draw import IPythonConsole\n", | |
"from rdkit.Chem import MACCSkeys\n", | |
"\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"FIXME:\n", | |
"This code ignore the following types of fragments in MACCS key\n", | |
"\n", | |
"> 1, ISOTOPE\n", | |
">\n", | |
"> 125, Aromatic Ring > 1\n", | |
">\n", | |
"> 166, fragments\n", | |
">\n", | |
"> c.f. https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 1., 0., 0., 0., 0., 0., 0., 0., 0., 4., 0., 0., 6.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 4., 0., 3., 0.,\n", | |
" 2., 3., 0., 0., 0., 0., 2., 6., 0., 0., 0., 0., 0.,\n", | |
" 1., 1., 0., 0., 0., 0., 3., 0., 5., 0., 0., 0., 0.,\n", | |
" 0., 1., 0., 0., 0., 1., 2., 0., 0., 0., 2., 1., 1.,\n", | |
" 2., 0., 5., 5., 2., 0., 0., 0., 0., 0., 2., 7., 0.,\n", | |
" 2., 0., 6., 0., 6., 1., 5., 5., 0., 1., 2., 7., 0.,\n", | |
" 1., 5., 1., 2., 2., 2., 1., 2., 0., 5., 1., 1., 5.,\n", | |
" 0., 10., 1., 2., 7., 24., 5., 1., 30., 0.])" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"smi = \"CN1CCN(CC2=CC=C(C=C2)C(=O)NC2=CC(NC3=NC=CC(=N3)C3=CN=CC=C3)=C(C)C=C2)CC1\"\n", | |
"mol = Chem.MolFromSmiles(smi)\n", | |
"\n", | |
"\n", | |
"fingerprint_maccs167 = {str(k-1):v[0] for k,v in MACCSkeys.smartsPatts.items()}\n", | |
"\n", | |
"list_fragment_counts = np.zeros(len(fingerprint_maccs167))\n", | |
"for idx_str, smarts in fingerprint_maccs167.items():\n", | |
" if smarts is not '?':\n", | |
"\n", | |
" substruct = Chem.MolFromSmarts(smarts)\n", | |
" list_fragment_counts[int(idx_str)] = len(mol.GetSubstructMatches(substruct))\n", | |
"\n", | |
"list_fragment_counts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAF8klEQVR4nO3d0XKjOBBAUbM1///L7AMVhjHGYLWIW9I5T042w7pMfCMwSNM8zw8ASv337ScA0DYZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVG6Mk3Tt58Cw/nz7ScAdSwBned5fVBxm/FN0bHJrwitexm7YAEFlOtklIadxm6aPv4NF1A+JaO06mUi99+8nkUBpYyM0p6Co/j3w1IBJUJGaUnkNOgdp1DhIaPUt15yVPVXq1YE7/hAn8HJKFVN0996bh9Ht1p+xvPiBqGY60ZpjFEk2biLiZYso8jg1aBQl4zSEoNQEnJQT1XzfNNHTJCW0Sg3mOd/egpdk1GAEBkFCJFRgBAZBQiRUYAQGaWyafcA+iajACEySk3rlB83zf3hbk4SklFaatMywV1DT5gRuBl0aPetplnR0wYjo1235HMHsy4O6u7VNGt1eXuW4GmDxf8LU+1Rl4wO52WP3nxZts3N98t/xfbzPgdjekfoQUYHcr1BdVfTXA7KK/6ivZwM/+w53BJ6eMjoIMoGcXVX06wb07p/EqqHnqHIaOcqHgWXbWG3wd8bln439IxDRrt1x2cyCc8nJg89I5DRbkXGZacbzCZz6Ome60b7lzB/+2WYg2PAW2+dgvfcxdS/+CC01jP5d7P1t1mxoduntzx25xRHZJTv2K/VNE25UpXqyZCZjJLFsg5eHvlDTxLOjfI1zS0emqry5GE0yjclD1NzoecrZJQv2NZzeZy2p2mfGHnIKLyw1HN7X5OeckRGAUJkFCBERgFCZBQgREb7ZwG4AC8d52S0f1bThFvJ6BDmeV4nkSv4t9WfD/RERju0jD33g9Dikj6cGYBj7qnvytOkxfslMAsmNjYRMrxnmttOnMauYBXlwQO6DsCHfQW4SEZ7cHHW96csvqmkgD6uvVDwkNHWFbzD3w9LB0/GND2O3hRWKOGI34xWxRa/rLyaZgderq68+xvz4mdARttTq3dW01wcBfRxeMbjxc8zMhltTPVDS8eqe1eWKbWcPSsXPI1OQ7eujzS9bKxcfs9A9ssmb79cBpj6yKdklLEc3Y1VFtA3XWYcMspYqq9Sp57IaHMcc+Zi9VBklOEIH3XJKCOq+zmSLg9ORhnIftnkultmTDIKhdYr8N3XNDgZbYz3KmQjowAhMgoQIqMMyMfq1CSjUGzaPWBEMtoMt29DTjLaEvWsy6rRVCGjLXG3TF3LzP9iSpBpm1M7va7bHOxBy6zVZcuomPGahQUkktoH9OieGSUtcLSo36dvh5vWs7KyS1vsrYxelnH7zacfcDNimX37rtfwptVVrTPYIhnNJbhssr1ZYP+6vd8Lp/uobEdY9bpd3nhZVHnDeNeVuX6MfzGRH+2Il4PiN1+Sjd2TQt33iZiWeb8XCl7V0936ZptP/8k+zUxGU7D6fBLVj6yP/u3FbRqWNsEFT/DX0/VP8THg/oKqj7b59Bx89JSTjMKztXe1ahW/OnV9MpFNcRN3McFrd0SqeJvrOHT98uk7fJGMwm8ry59bV9OSUWiGQWhOMgqNcUo0GxkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZTaHufX7ucoHfZIanLKrMKmTinxGYdTQbGc3l547pj98m0/QwCXcrineTP5M5yWg6P8PS5fGlf/KzUKh3V7f8mczMjkntdA16Syu3peB4/GgXG5nmIaMNO40sCX24aKiANkBG4QsK/gQux/UPAc1HRtuwfdcZhPbh+gkZp26S8xFTM9SzM8vePE2k/Z6fy++bMc8Pl9X3Z55PKqmh+ckoQIiMtsSAtG/bnWtHN0RGG+MQr2/q2SIZbcPFjyNonQOOFskoQIiMQi4GpM2RUUjHeZu2yChksa2nkjZERgFCZLQtzplBOjIKECKjACEyChAiowAhMtoSc2xDQjIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAhMgoQIqMAITIKECKjACEyChAiowAh/wMPmcnPuncvwgAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<rdkit.Chem.rdchem.Mol at 0x2322841f530>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mol" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment