Created
March 5, 2019 21:46
-
-
Save yamasakih/d423cb2a387c397112eb66a64202965c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"from rdkit import Chem\n", | |
"from rdkit.Chem import Descriptors\n", | |
"from rdkit.ML.Descriptors import MoleculeDescriptors\n", | |
"\n", | |
"\n", | |
"def calculate_descriptors(mols, names=None, ipc_avg=False):\n", | |
" if names is None:\n", | |
" names = [d[0] for d in Descriptors._descList]\n", | |
" calc = MoleculeDescriptors.MolecularDescriptorCalculator(names)\n", | |
" descs = [calc.CalcDescriptors(mol) for mol in mols]\n", | |
" descs = pd.DataFrame(descs, columns=names)\n", | |
" if 'Ipc' in names and ipc_avg:\n", | |
" descs['Ipc'] = [Descriptors.Ipc(mol, avg=True) for mol in mols] \n", | |
" return descs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(s) for s in ['c1ccccc1', 'C1CCCCC1', 'CCO']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>MaxEStateIndex</th>\n", | |
" <th>MinEStateIndex</th>\n", | |
" <th>MaxAbsEStateIndex</th>\n", | |
" <th>MinAbsEStateIndex</th>\n", | |
" <th>qed</th>\n", | |
" <th>MolWt</th>\n", | |
" <th>HeavyAtomMolWt</th>\n", | |
" <th>ExactMolWt</th>\n", | |
" <th>NumValenceElectrons</th>\n", | |
" <th>NumRadicalElectrons</th>\n", | |
" <th>...</th>\n", | |
" <th>fr_sulfide</th>\n", | |
" <th>fr_sulfonamd</th>\n", | |
" <th>fr_sulfone</th>\n", | |
" <th>fr_term_acetylene</th>\n", | |
" <th>fr_tetrazole</th>\n", | |
" <th>fr_thiazole</th>\n", | |
" <th>fr_thiocyan</th>\n", | |
" <th>fr_thiophene</th>\n", | |
" <th>fr_unbrch_alkane</th>\n", | |
" <th>fr_urea</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2.000000</td>\n", | |
" <td>2.00</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>2.00</td>\n", | |
" <td>0.442628</td>\n", | |
" <td>78.114</td>\n", | |
" <td>72.066</td>\n", | |
" <td>78.046950</td>\n", | |
" <td>30</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.500000</td>\n", | |
" <td>1.50</td>\n", | |
" <td>1.500000</td>\n", | |
" <td>1.50</td>\n", | |
" <td>0.422316</td>\n", | |
" <td>84.162</td>\n", | |
" <td>72.066</td>\n", | |
" <td>84.093900</td>\n", | |
" <td>36</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7.569444</td>\n", | |
" <td>0.25</td>\n", | |
" <td>7.569444</td>\n", | |
" <td>0.25</td>\n", | |
" <td>0.406808</td>\n", | |
" <td>46.069</td>\n", | |
" <td>40.021</td>\n", | |
" <td>46.041865</td>\n", | |
" <td>20</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>3 rows × 200 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" MaxEStateIndex MinEStateIndex MaxAbsEStateIndex MinAbsEStateIndex \\\n", | |
"0 2.000000 2.00 2.000000 2.00 \n", | |
"1 1.500000 1.50 1.500000 1.50 \n", | |
"2 7.569444 0.25 7.569444 0.25 \n", | |
"\n", | |
" qed MolWt HeavyAtomMolWt ExactMolWt NumValenceElectrons \\\n", | |
"0 0.442628 78.114 72.066 78.046950 30 \n", | |
"1 0.422316 84.162 72.066 84.093900 36 \n", | |
"2 0.406808 46.069 40.021 46.041865 20 \n", | |
"\n", | |
" NumRadicalElectrons ... fr_sulfide fr_sulfonamd fr_sulfone \\\n", | |
"0 0 ... 0 0 0 \n", | |
"1 0 ... 0 0 0 \n", | |
"2 0 ... 0 0 0 \n", | |
"\n", | |
" fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene \\\n", | |
"0 0 0 0 0 0 \n", | |
"1 0 0 0 0 0 \n", | |
"2 0 0 0 0 0 \n", | |
"\n", | |
" fr_unbrch_alkane fr_urea \n", | |
"0 0 0 \n", | |
"1 0 0 \n", | |
"2 0 0 \n", | |
"\n", | |
"[3 rows x 200 columns]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"calculate_descriptors(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Ipc</th>\n", | |
" <th>MolWt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>34.399462</td>\n", | |
" <td>78.114</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>34.399462</td>\n", | |
" <td>84.162</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2.754888</td>\n", | |
" <td>46.069</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Ipc MolWt\n", | |
"0 34.399462 78.114\n", | |
"1 34.399462 84.162\n", | |
"2 2.754888 46.069" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"calculate_descriptors(mols, ['Ipc', 'MolWt'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Ipc</th>\n", | |
" <th>MolWt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.719973</td>\n", | |
" <td>78.114</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.719973</td>\n", | |
" <td>84.162</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.918296</td>\n", | |
" <td>46.069</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Ipc MolWt\n", | |
"0 1.719973 78.114\n", | |
"1 1.719973 84.162\n", | |
"2 0.918296 46.069" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"calculate_descriptors(mols, ['Ipc', 'MolWt'], ipc_avg=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# EOF " | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment