Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save avrilcoghlan/531acff5f2f5860d5fc6631f549aec43 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/531acff5f2f5860d5fc6631f549aec43 to your computer and use it in GitHub Desktop.
Python notebook to query ChEMBL, to retrieve compounds with bioactivities for certain targets, and obtain properties of those compounds
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"###############################\n",
"#This cell imports relevant python modules:\n",
"###############################\n",
"import pandas as pd #Use pandas python module to view and analyse data\n",
"import requests #This is used to access json files!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Using a list of known targets, find compounds that are active on these targets:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is the url string that calls the 'Activities' API with the initial query specification:\n",
"https://www.ebi.ac.uk/chembl/api/data/activity.json?target_chembl_id__in=CHEMBL1848,CHEMBL3394&assay_type=B&pchembl_value__gte=5&limit=100\n",
"\n",
"These are the available columns for the Activities API:\n",
"Index(['activity_comment', 'activity_id', 'activity_properties',\n",
" 'assay_chembl_id', 'assay_description', 'assay_type', 'bao_endpoint',\n",
" 'bao_format', 'bao_label', 'canonical_smiles', 'data_validity_comment',\n",
" 'data_validity_description', 'document_chembl_id', 'document_journal',\n",
" 'document_year', 'ligand_efficiency', 'molecule_chembl_id',\n",
" 'molecule_pref_name', 'parent_molecule_chembl_id', 'pchembl_value',\n",
" 'potential_duplicate', 'qudt_units', 'record_id', 'relation', 'src_id',\n",
" 'standard_flag', 'standard_relation', 'standard_text_value',\n",
" 'standard_type', 'standard_units', 'standard_upper_value',\n",
" 'standard_value', 'target_chembl_id', 'target_organism',\n",
" 'target_pref_name', 'target_tax_id', 'text_value', 'toid', 'type',\n",
" 'units', 'uo_units', 'upper_value', 'value'],\n",
" dtype='object')\n"
]
}
],
"source": [
"###############################\n",
"#Search for activity for a list of targets:\n",
"###############################\n",
"\n",
"#Specify the input parameters: \n",
"targets = ['CHEMBL1848', 'CHEMBL3394'] #These targets come from the Supplementary Table 21 excel spreadsheet\n",
"targets = \",\".join(targets) #Join the targets into a suitable string to fulfil the search conditions of the API\n",
"assay_type = 'B' #Only look for Binding Assays\n",
"pchembl_value = 5 #Specify a minimum threshold of the pChEMBL activity value. Greater than or equal to 5 (10um) is a typical minium rule of thumb for binding activity between a compound and a protein target\n",
"limit = 100 #Limit the number of records pulled back for each url call\n",
"\n",
"###############################\n",
"#Set up the call to the ChEMBL 'activity' API\n",
"#Remember that there is a limit to the number of records returned in any one API call (default is 20 records, maximum is 1000 records)\n",
"#So need to iterate over several pages of records to gather all relevant information together!\n",
"url_stem = \"https://www.ebi.ac.uk\" #This is the stem of the url\n",
"url_full_string = url_stem + \"/chembl/api/data/activity.json?target_chembl_id__in={}&assay_type={}&pchembl_value__gte=5&limit={}\".format(targets, assay_type, limit) #This is the full url with the specified input parameters\n",
"url_full = requests.get( url_full_string ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format\n",
"url_activities = url_full['activities'] #This is a list of the results for activities\n",
"\n",
"#This 'while' loop iterates over several pages of records (if required), and collates the list of results\n",
"while url_full['page_meta']['next']:\n",
" url_full = requests.get(url_stem + url_full['page_meta']['next']).json()\n",
" url_activities = url_activities + url_full['activities'] #Add result (as a list) to previous list of results\n",
"\n",
"#Convert the list of results into a Pandas dataframe:\n",
"act_df = pd.DataFrame(url_activities)\n",
"\n",
"#Print out some useful information:\n",
"print(\"This is the url string that calls the 'Activities' API with the initial query specification:\\n{}\".format(url_full_string) )\n",
"print(\"\\nThese are the available columns for the Activities API:\\n{}\".format(act_df.columns))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retain only relevant columns from the Activities API:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_chembl_id</th>\n",
" <th>target_organism</th>\n",
" <th>target_pref_name</th>\n",
" <th>parent_molecule_chembl_id</th>\n",
" <th>molecule_chembl_id</th>\n",
" <th>molecule_pref_name</th>\n",
" <th>pchembl_value</th>\n",
" <th>standard_type</th>\n",
" <th>standard_relation</th>\n",
" <th>standard_value</th>\n",
" <th>standard_units</th>\n",
" <th>assay_chembl_id</th>\n",
" <th>document_chembl_id</th>\n",
" <th>src_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL67</td>\n",
" <td>CHEMBL67</td>\n",
" <td>COMBRETASTATIN A4</td>\n",
" <td>6.75</td>\n",
" <td>Ki</td>\n",
" <td>=</td>\n",
" <td>180</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817537</td>\n",
" <td>CHEMBL1135048</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL107</td>\n",
" <td>CHEMBL107</td>\n",
" <td>COLCHICINE</td>\n",
" <td>6.11</td>\n",
" <td>Ki</td>\n",
" <td>=</td>\n",
" <td>780</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817537</td>\n",
" <td>CHEMBL1135048</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL143850</td>\n",
" <td>CHEMBL143850</td>\n",
" <td>None</td>\n",
" <td>6.2</td>\n",
" <td>Ki</td>\n",
" <td>=</td>\n",
" <td>630</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817537</td>\n",
" <td>CHEMBL1135048</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL143680</td>\n",
" <td>CHEMBL143680</td>\n",
" <td>None</td>\n",
" <td>5.43</td>\n",
" <td>Ki</td>\n",
" <td>=</td>\n",
" <td>3690</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817537</td>\n",
" <td>CHEMBL1135048</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL136780</td>\n",
" <td>CHEMBL136780</td>\n",
" <td>None</td>\n",
" <td>5.47</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3400</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817535</td>\n",
" <td>CHEMBL1135048</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL436000</td>\n",
" <td>CHEMBL436000</td>\n",
" <td>None</td>\n",
" <td>5.14</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>7200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL820774</td>\n",
" <td>CHEMBL1130240</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL156658</td>\n",
" <td>CHEMBL156658</td>\n",
" <td>None</td>\n",
" <td>5.07</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>8600</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817536</td>\n",
" <td>CHEMBL1151998</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL107</td>\n",
" <td>CHEMBL107</td>\n",
" <td>COLCHICINE</td>\n",
" <td>5.46</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3500</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817536</td>\n",
" <td>CHEMBL1151998</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL79720</td>\n",
" <td>CHEMBL79720</td>\n",
" <td>None</td>\n",
" <td>5.6</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2500</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817536</td>\n",
" <td>CHEMBL1151998</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL67</td>\n",
" <td>CHEMBL67</td>\n",
" <td>COMBRETASTATIN A4</td>\n",
" <td>5.54</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2900</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL817536</td>\n",
" <td>CHEMBL1151998</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627442</td>\n",
" <td>CHEMBL1627442</td>\n",
" <td>None</td>\n",
" <td>6.04</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>910</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627436</td>\n",
" <td>CHEMBL1627436</td>\n",
" <td>None</td>\n",
" <td>5.03</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>9400</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627445</td>\n",
" <td>CHEMBL1627445</td>\n",
" <td>None</td>\n",
" <td>5.52</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL265926</td>\n",
" <td>CHEMBL265926</td>\n",
" <td>None</td>\n",
" <td>5.32</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4800</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL189905</td>\n",
" <td>CHEMBL189905</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>10000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627439</td>\n",
" <td>CHEMBL1627439</td>\n",
" <td>None</td>\n",
" <td>5.11</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>7700</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627838</td>\n",
" <td>CHEMBL1627838</td>\n",
" <td>None</td>\n",
" <td>5.07</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>8600</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL299613</td>\n",
" <td>CHEMBL299613</td>\n",
" <td>2-METHOXYESTRADIOL</td>\n",
" <td>5.54</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2900</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1628145</td>\n",
" <td>CHEMBL1628145</td>\n",
" <td>None</td>\n",
" <td>5.62</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2400</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627441</td>\n",
" <td>CHEMBL1627441</td>\n",
" <td>None</td>\n",
" <td>5.32</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4800</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627438</td>\n",
" <td>CHEMBL1627438</td>\n",
" <td>None</td>\n",
" <td>5.38</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL1627439</td>\n",
" <td>CHEMBL1627439</td>\n",
" <td>None</td>\n",
" <td>5.96</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>1100</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL389126</td>\n",
" <td>CHEMBL389126</td>\n",
" <td>None</td>\n",
" <td>5.31</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4900</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL815873</td>\n",
" <td>CHEMBL1128390</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL137829</td>\n",
" <td>CHEMBL137829</td>\n",
" <td>None</td>\n",
" <td>5.5</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL138698</td>\n",
" <td>CHEMBL138698</td>\n",
" <td>None</td>\n",
" <td>5.16</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>6900</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL137924</td>\n",
" <td>CHEMBL137924</td>\n",
" <td>None</td>\n",
" <td>5.44</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3600</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL440529</td>\n",
" <td>CHEMBL440529</td>\n",
" <td>None</td>\n",
" <td>5.38</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL141271</td>\n",
" <td>CHEMBL141271</td>\n",
" <td>None</td>\n",
" <td>5.35</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4500</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL142135</td>\n",
" <td>CHEMBL142135</td>\n",
" <td>None</td>\n",
" <td>5.28</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>5300</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL422643</td>\n",
" <td>CHEMBL422643</td>\n",
" <td>None</td>\n",
" <td>5.24</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>5700</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL140774</td>\n",
" <td>CHEMBL140774</td>\n",
" <td>None</td>\n",
" <td>5.5</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL122397</td>\n",
" <td>CHEMBL122397</td>\n",
" <td>None</td>\n",
" <td>5.44</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3600</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL344614</td>\n",
" <td>CHEMBL344614</td>\n",
" <td>None</td>\n",
" <td>5.31</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4900</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL137897</td>\n",
" <td>CHEMBL137897</td>\n",
" <td>None</td>\n",
" <td>5.44</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3600</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL44918</td>\n",
" <td>CHEMBL44918</td>\n",
" <td>None</td>\n",
" <td>5.4</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL341946</td>\n",
" <td>CHEMBL341946</td>\n",
" <td>None</td>\n",
" <td>5.3</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>5000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL137828</td>\n",
" <td>CHEMBL137828</td>\n",
" <td>None</td>\n",
" <td>5.38</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>4200</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL139833</td>\n",
" <td>CHEMBL139833</td>\n",
" <td>None</td>\n",
" <td>5.68</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2100</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL845247</td>\n",
" <td>CHEMBL1134836</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL20705</td>\n",
" <td>CHEMBL20705</td>\n",
" <td>None</td>\n",
" <td>5.16</td>\n",
" <td>Ki</td>\n",
" <td>=</td>\n",
" <td>7000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL838680</td>\n",
" <td>CHEMBL1141181</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL85065</td>\n",
" <td>CHEMBL85065</td>\n",
" <td>NAPHTHYLCOMBRETASTATIN</td>\n",
" <td>5</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>10000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL837869</td>\n",
" <td>CHEMBL1145177</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL84903</td>\n",
" <td>CHEMBL84903</td>\n",
" <td>None</td>\n",
" <td>5.52</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>3000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL837869</td>\n",
" <td>CHEMBL1145177</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL175504</td>\n",
" <td>CHEMBL175504</td>\n",
" <td>None</td>\n",
" <td>5.7</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>2000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL837869</td>\n",
" <td>CHEMBL1145177</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>CHEMBL3394</td>\n",
" <td>Bos taurus</td>\n",
" <td>Tubulin beta chain</td>\n",
" <td>CHEMBL3706760</td>\n",
" <td>CHEMBL3706760</td>\n",
" <td>None</td>\n",
" <td>5.22</td>\n",
" <td>IC50</td>\n",
" <td>=</td>\n",
" <td>6000</td>\n",
" <td>nM</td>\n",
" <td>CHEMBL837869</td>\n",
" <td>CHEMBL1145177</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_chembl_id target_organism target_pref_name \\\n",
"0 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"1 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"2 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"3 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"4 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"5 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"6 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"7 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"8 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"9 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"10 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"11 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"12 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"13 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"14 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"15 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"16 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"17 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"18 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"19 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"20 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"21 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"22 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"23 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"24 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"25 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"26 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"27 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"28 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"29 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"30 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"31 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"32 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"33 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"34 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"35 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"36 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"37 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"38 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"39 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"40 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"41 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"42 CHEMBL3394 Bos taurus Tubulin beta chain \n",
"\n",
" parent_molecule_chembl_id molecule_chembl_id molecule_pref_name \\\n",
"0 CHEMBL67 CHEMBL67 COMBRETASTATIN A4 \n",
"1 CHEMBL107 CHEMBL107 COLCHICINE \n",
"2 CHEMBL143850 CHEMBL143850 None \n",
"3 CHEMBL143680 CHEMBL143680 None \n",
"4 CHEMBL136780 CHEMBL136780 None \n",
"5 CHEMBL436000 CHEMBL436000 None \n",
"6 CHEMBL156658 CHEMBL156658 None \n",
"7 CHEMBL107 CHEMBL107 COLCHICINE \n",
"8 CHEMBL79720 CHEMBL79720 None \n",
"9 CHEMBL67 CHEMBL67 COMBRETASTATIN A4 \n",
"10 CHEMBL1627442 CHEMBL1627442 None \n",
"11 CHEMBL1627436 CHEMBL1627436 None \n",
"12 CHEMBL1627445 CHEMBL1627445 None \n",
"13 CHEMBL265926 CHEMBL265926 None \n",
"14 CHEMBL189905 CHEMBL189905 None \n",
"15 CHEMBL1627439 CHEMBL1627439 None \n",
"16 CHEMBL1627838 CHEMBL1627838 None \n",
"17 CHEMBL299613 CHEMBL299613 2-METHOXYESTRADIOL \n",
"18 CHEMBL1628145 CHEMBL1628145 None \n",
"19 CHEMBL1627441 CHEMBL1627441 None \n",
"20 CHEMBL1627438 CHEMBL1627438 None \n",
"21 CHEMBL1627439 CHEMBL1627439 None \n",
"22 CHEMBL389126 CHEMBL389126 None \n",
"23 CHEMBL137829 CHEMBL137829 None \n",
"24 CHEMBL138698 CHEMBL138698 None \n",
"25 CHEMBL137924 CHEMBL137924 None \n",
"26 CHEMBL440529 CHEMBL440529 None \n",
"27 CHEMBL141271 CHEMBL141271 None \n",
"28 CHEMBL142135 CHEMBL142135 None \n",
"29 CHEMBL422643 CHEMBL422643 None \n",
"30 CHEMBL140774 CHEMBL140774 None \n",
"31 CHEMBL122397 CHEMBL122397 None \n",
"32 CHEMBL344614 CHEMBL344614 None \n",
"33 CHEMBL137897 CHEMBL137897 None \n",
"34 CHEMBL44918 CHEMBL44918 None \n",
"35 CHEMBL341946 CHEMBL341946 None \n",
"36 CHEMBL137828 CHEMBL137828 None \n",
"37 CHEMBL139833 CHEMBL139833 None \n",
"38 CHEMBL20705 CHEMBL20705 None \n",
"39 CHEMBL85065 CHEMBL85065 NAPHTHYLCOMBRETASTATIN \n",
"40 CHEMBL84903 CHEMBL84903 None \n",
"41 CHEMBL175504 CHEMBL175504 None \n",
"42 CHEMBL3706760 CHEMBL3706760 None \n",
"\n",
" pchembl_value standard_type standard_relation standard_value \\\n",
"0 6.75 Ki = 180 \n",
"1 6.11 Ki = 780 \n",
"2 6.2 Ki = 630 \n",
"3 5.43 Ki = 3690 \n",
"4 5.47 IC50 = 3400 \n",
"5 5.14 IC50 = 7200 \n",
"6 5.07 IC50 = 8600 \n",
"7 5.46 IC50 = 3500 \n",
"8 5.6 IC50 = 2500 \n",
"9 5.54 IC50 = 2900 \n",
"10 6.04 IC50 = 910 \n",
"11 5.03 IC50 = 9400 \n",
"12 5.52 IC50 = 3000 \n",
"13 5.32 IC50 = 4800 \n",
"14 5 IC50 = 10000 \n",
"15 5.11 IC50 = 7700 \n",
"16 5.07 IC50 = 8600 \n",
"17 5.54 IC50 = 2900 \n",
"18 5.62 IC50 = 2400 \n",
"19 5.32 IC50 = 4800 \n",
"20 5.38 IC50 = 4200 \n",
"21 5.96 IC50 = 1100 \n",
"22 5.31 IC50 = 4900 \n",
"23 5.5 IC50 = 3200 \n",
"24 5.16 IC50 = 6900 \n",
"25 5.44 IC50 = 3600 \n",
"26 5.38 IC50 = 4200 \n",
"27 5.35 IC50 = 4500 \n",
"28 5.28 IC50 = 5300 \n",
"29 5.24 IC50 = 5700 \n",
"30 5.5 IC50 = 3200 \n",
"31 5.44 IC50 = 3600 \n",
"32 5.31 IC50 = 4900 \n",
"33 5.44 IC50 = 3600 \n",
"34 5.4 IC50 = 4000 \n",
"35 5.3 IC50 = 5000 \n",
"36 5.38 IC50 = 4200 \n",
"37 5.68 IC50 = 2100 \n",
"38 5.16 Ki = 7000 \n",
"39 5 IC50 = 10000 \n",
"40 5.52 IC50 = 3000 \n",
"41 5.7 IC50 = 2000 \n",
"42 5.22 IC50 = 6000 \n",
"\n",
" standard_units assay_chembl_id document_chembl_id src_id \n",
"0 nM CHEMBL817537 CHEMBL1135048 1 \n",
"1 nM CHEMBL817537 CHEMBL1135048 1 \n",
"2 nM CHEMBL817537 CHEMBL1135048 1 \n",
"3 nM CHEMBL817537 CHEMBL1135048 1 \n",
"4 nM CHEMBL817535 CHEMBL1135048 1 \n",
"5 nM CHEMBL820774 CHEMBL1130240 1 \n",
"6 nM CHEMBL817536 CHEMBL1151998 1 \n",
"7 nM CHEMBL817536 CHEMBL1151998 1 \n",
"8 nM CHEMBL817536 CHEMBL1151998 1 \n",
"9 nM CHEMBL817536 CHEMBL1151998 1 \n",
"10 nM CHEMBL815873 CHEMBL1128390 1 \n",
"11 nM CHEMBL815873 CHEMBL1128390 1 \n",
"12 nM CHEMBL815873 CHEMBL1128390 1 \n",
"13 nM CHEMBL815873 CHEMBL1128390 1 \n",
"14 nM CHEMBL815873 CHEMBL1128390 1 \n",
"15 nM CHEMBL815873 CHEMBL1128390 1 \n",
"16 nM CHEMBL815873 CHEMBL1128390 1 \n",
"17 nM CHEMBL815873 CHEMBL1128390 1 \n",
"18 nM CHEMBL815873 CHEMBL1128390 1 \n",
"19 nM CHEMBL815873 CHEMBL1128390 1 \n",
"20 nM CHEMBL815873 CHEMBL1128390 1 \n",
"21 nM CHEMBL815873 CHEMBL1128390 1 \n",
"22 nM CHEMBL815873 CHEMBL1128390 1 \n",
"23 nM CHEMBL845247 CHEMBL1134836 1 \n",
"24 nM CHEMBL845247 CHEMBL1134836 1 \n",
"25 nM CHEMBL845247 CHEMBL1134836 1 \n",
"26 nM CHEMBL845247 CHEMBL1134836 1 \n",
"27 nM CHEMBL845247 CHEMBL1134836 1 \n",
"28 nM CHEMBL845247 CHEMBL1134836 1 \n",
"29 nM CHEMBL845247 CHEMBL1134836 1 \n",
"30 nM CHEMBL845247 CHEMBL1134836 1 \n",
"31 nM CHEMBL845247 CHEMBL1134836 1 \n",
"32 nM CHEMBL845247 CHEMBL1134836 1 \n",
"33 nM CHEMBL845247 CHEMBL1134836 1 \n",
"34 nM CHEMBL845247 CHEMBL1134836 1 \n",
"35 nM CHEMBL845247 CHEMBL1134836 1 \n",
"36 nM CHEMBL845247 CHEMBL1134836 1 \n",
"37 nM CHEMBL845247 CHEMBL1134836 1 \n",
"38 nM CHEMBL838680 CHEMBL1141181 1 \n",
"39 nM CHEMBL837869 CHEMBL1145177 1 \n",
"40 nM CHEMBL837869 CHEMBL1145177 1 \n",
"41 nM CHEMBL837869 CHEMBL1145177 1 \n",
"42 nM CHEMBL837869 CHEMBL1145177 1 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Specify which columns to keep so that the size of the dataframe becomes more manageable:\n",
"act_df = act_df[[ 'target_chembl_id','target_organism', 'target_pref_name'\n",
" , 'parent_molecule_chembl_id','molecule_chembl_id','molecule_pref_name'\n",
" , 'pchembl_value', 'standard_type','standard_relation', 'standard_value', 'standard_units'\n",
" , 'assay_chembl_id','document_chembl_id','src_id']]\n",
"act_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Extract the list of compounds from the previous dataframe ('act_df'), and call the 'molecule' API to find their molecular properties etc, so that this list can be refined"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 40 compounds initially identified as active on the known targets. e.g.\n",
"['CHEMBL1627436', 'CHEMBL299613']\n"
]
}
],
"source": [
"###############################\n",
"#First find the list of compounds that are within the act_df dataframe:\n",
"###############################\n",
"cmpd_chembl_ids = list(set(act_df['molecule_chembl_id']))\n",
"print(\"There are {} compounds initially identified as active on the known targets. e.g.\".format(len(cmpd_chembl_ids)))\n",
"print(cmpd_chembl_ids[0:2])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is the url string that calls the 'Molecule' API with the specified query\n",
"https://www.ebi.ac.uk/chembl/api/data/molecule.json?molecule_chembl_id__in=CHEMBL1627436,CHEMBL299613,CHEMBL137897,CHEMBL141271,CHEMBL44918,CHEMBL189905,CHEMBL85065,CHEMBL1627439,CHEMBL436000,CHEMBL175504,CHEMBL107,CHEMBL3706760,CHEMBL139833,CHEMBL138698,CHEMBL389126,CHEMBL143850,CHEMBL140774,CHEMBL79720,CHEMBL440529,CHEMBL137829,CHEMBL422643,CHEMBL143680,CHEMBL156658,CHEMBL67,CHEMBL1627442,CHEMBL136780,CHEMBL142135,CHEMBL137828,CHEMBL1627441,CHEMBL341946,CHEMBL20705,CHEMBL265926,CHEMBL1627838,CHEMBL1627445,CHEMBL84903,CHEMBL1627438,CHEMBL1628145,CHEMBL137924,CHEMBL122397,CHEMBL344614&limit=100\n",
"\n",
"These are the available columns for the Molecule API:\n",
"Index(['atc_classifications', 'availability_type', 'biotherapeutic',\n",
" 'black_box_warning', 'chebi_par_id', 'chirality', 'cross_references',\n",
" 'dosed_ingredient', 'first_approval', 'first_in_class', 'helm_notation',\n",
" 'indication_class', 'inorganic_flag', 'max_phase', 'molecule_chembl_id',\n",
" 'molecule_hierarchy', 'molecule_properties', 'molecule_structures',\n",
" 'molecule_synonyms', 'molecule_type', 'natural_product', 'oral',\n",
" 'parenteral', 'polymer_flag', 'pref_name', 'prodrug', 'structure_type',\n",
" 'therapeutic_flag', 'topical', 'usan_stem', 'usan_stem_definition',\n",
" 'usan_substem', 'usan_year', 'withdrawn_class', 'withdrawn_country',\n",
" 'withdrawn_flag', 'withdrawn_reason', 'withdrawn_year'],\n",
" dtype='object')\n"
]
}
],
"source": [
"###############################\n",
"#For the identified compounds, extract their molecular properties and other information from the 'molecule' ChEMBL API\n",
"###############################\n",
"\n",
"#Specify the input parameters: \n",
"cmpd_chembl_ids = \",\".join(cmpd_chembl_ids[0:]) #Amend the format of the text string of compounds so that it is suitable for the API call\n",
"limit = 100 #Limit the number of records pulled back for each url call\n",
"\n",
"###############################\n",
"#Set up the call to the ChEMBL 'molecule' API\n",
"#Remember that there is a limit to the number of records returned in any one API call (default is 20 records, maximum is 1000 records)\n",
"#So need to iterate over several pages of records to gather all relevant information together!\n",
"url_stem = \"https://www.ebi.ac.uk\" #This is the stem of the url\n",
"url_full_string = url_stem + \"/chembl/api/data/molecule.json?molecule_chembl_id__in={}&limit={}\".format(cmpd_chembl_ids, limit) #This is the full url with the specified input parameters\n",
"url_full = requests.get( url_full_string ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format\n",
"url_molecules = url_full['molecules'] #This is a list of the results for activities\n",
"\n",
"#This 'while' loop iterates over several pages of records (if required), and collates the list of results\n",
"while url_full['page_meta']['next']:\n",
" url_full = requests.get(url_stem + url_full['page_meta']['next']).json()\n",
" url_molecules = url_molecules + url_full['molecules'] #Add result (as a list) to previous list of results\n",
"\n",
"#Convert the list of results into a Pandas dataframe:\n",
"mol_df = pd.DataFrame(url_molecules)\n",
"\n",
"#Print out some useful information:\n",
"print(\"This is the url string that calls the 'Molecule' API with the specified query\\n{}\".format(url_full_string) )\n",
"print(\"\\nThese are the available columns for the Molecule API:\\n{}\".format(mol_df.columns))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>molecule_chembl_id</th>\n",
" <th>pref_name</th>\n",
" <th>molecule_hierarchy</th>\n",
" <th>molecule_properties</th>\n",
" <th>max_phase</th>\n",
" <th>parent_chembl_id</th>\n",
" <th>acd_logd</th>\n",
" <th>acd_logp</th>\n",
" <th>acd_most_apka</th>\n",
" <th>acd_most_bpka</th>\n",
" <th>alogp</th>\n",
" <th>hba</th>\n",
" <th>hbd</th>\n",
" <th>mw_freebase</th>\n",
" <th>full_mwt</th>\n",
" <th>num_ro5_violations</th>\n",
" <th>psa</th>\n",
" <th>heavy_atoms</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL67</td>\n",
" <td>COMBRETASTATIN A4</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL67', 'parent_che...</td>\n",
" <td>{'acd_logd': '2.92', 'acd_logp': '2.92', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL67</td>\n",
" <td>2.92</td>\n",
" <td>2.92</td>\n",
" <td>9.65</td>\n",
" <td>None</td>\n",
" <td>3.6</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>316.35</td>\n",
" <td>316.35</td>\n",
" <td>0</td>\n",
" <td>57.15</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL107</td>\n",
" <td>COLCHICINE</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL107', 'parent_ch...</td>\n",
" <td>{'acd_logd': '1.07', 'acd_logp': '1.07', 'acd_...</td>\n",
" <td>4</td>\n",
" <td>CHEMBL107</td>\n",
" <td>1.07</td>\n",
" <td>1.07</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2.87</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>399.44</td>\n",
" <td>399.44</td>\n",
" <td>0</td>\n",
" <td>83.09</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL20705</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL20705', 'parent_...</td>\n",
" <td>{'acd_logd': '3.06', 'acd_logp': '3.09', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL20705</td>\n",
" <td>3.06</td>\n",
" <td>3.09</td>\n",
" <td>8.43</td>\n",
" <td>None</td>\n",
" <td>3.63</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>336.8</td>\n",
" <td>336.8</td>\n",
" <td>0</td>\n",
" <td>71.19</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL44918</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL44918', 'parent_...</td>\n",
" <td>{'acd_logd': '2.82', 'acd_logp': '2.83', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL44918</td>\n",
" <td>2.82</td>\n",
" <td>2.83</td>\n",
" <td>9.34</td>\n",
" <td>3.66</td>\n",
" <td>3.53</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>396.42</td>\n",
" <td>396.42</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL299613</td>\n",
" <td>2-METHOXYESTRADIOL</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL299613', 'parent...</td>\n",
" <td>{'acd_logd': '3.84', 'acd_logp': '3.84', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL299613</td>\n",
" <td>3.84</td>\n",
" <td>3.84</td>\n",
" <td>10.29</td>\n",
" <td>None</td>\n",
" <td>3.62</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>302.41</td>\n",
" <td>302.41</td>\n",
" <td>0</td>\n",
" <td>49.69</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CHEMBL79720</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL79720', 'parent_...</td>\n",
" <td>{'acd_logd': '1.01', 'acd_logp': '1.01', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL79720</td>\n",
" <td>1.01</td>\n",
" <td>1.01</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2.75</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>302.33</td>\n",
" <td>302.33</td>\n",
" <td>0</td>\n",
" <td>53.99</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>CHEMBL84903</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL84903', 'parent_...</td>\n",
" <td>{'acd_logd': '4.64', 'acd_logp': '4.65', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL84903</td>\n",
" <td>4.64</td>\n",
" <td>4.65</td>\n",
" <td>9.69</td>\n",
" <td>None</td>\n",
" <td>4.72</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>276.33</td>\n",
" <td>276.33</td>\n",
" <td>0</td>\n",
" <td>29.46</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>CHEMBL85065</td>\n",
" <td>NAPHTHYLCOMBRETASTATIN</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL85065', 'parent_...</td>\n",
" <td>{'acd_logd': '4.53', 'acd_logp': '4.53', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL85065</td>\n",
" <td>4.53</td>\n",
" <td>4.53</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5.04</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>320.39</td>\n",
" <td>320.39</td>\n",
" <td>1</td>\n",
" <td>27.69</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CHEMBL122397</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL122397', 'parent...</td>\n",
" <td>{'acd_logd': '3.59', 'acd_logp': '3.61', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL122397</td>\n",
" <td>3.59</td>\n",
" <td>3.61</td>\n",
" <td>9.39</td>\n",
" <td>3.69</td>\n",
" <td>3.73</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>358.44</td>\n",
" <td>358.44</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>CHEMBL436000</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL436000', 'parent...</td>\n",
" <td>{'acd_logd': '3.05', 'acd_logp': '3.05', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL436000</td>\n",
" <td>3.05</td>\n",
" <td>3.05</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3.26</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>284.4</td>\n",
" <td>284.4</td>\n",
" <td>0</td>\n",
" <td>37.3</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>CHEMBL136780</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL136780', 'parent...</td>\n",
" <td>{'acd_logd': '1.39', 'acd_logp': '1.39', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL136780</td>\n",
" <td>1.39</td>\n",
" <td>1.39</td>\n",
" <td>None</td>\n",
" <td>4.51</td>\n",
" <td>2.84</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>385.42</td>\n",
" <td>385.42</td>\n",
" <td>0</td>\n",
" <td>95.61</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>CHEMBL422643</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL422643', 'parent...</td>\n",
" <td>{'acd_logd': '2.24', 'acd_logp': '2.24', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL422643</td>\n",
" <td>2.24</td>\n",
" <td>2.24</td>\n",
" <td>10.29</td>\n",
" <td>2.63</td>\n",
" <td>4.07</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>376.46</td>\n",
" <td>376.46</td>\n",
" <td>0</td>\n",
" <td>81.59</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CHEMBL344614</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL344614', 'parent...</td>\n",
" <td>{'acd_logd': '1.9', 'acd_logp': '1.9', 'acd_mo...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL344614</td>\n",
" <td>1.9</td>\n",
" <td>1.9</td>\n",
" <td>10.29</td>\n",
" <td>2.61</td>\n",
" <td>3.68</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>362.43</td>\n",
" <td>362.43</td>\n",
" <td>0</td>\n",
" <td>81.59</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>CHEMBL138698</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL138698', 'parent...</td>\n",
" <td>{'acd_logd': '2.69', 'acd_logp': '2.69', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL138698</td>\n",
" <td>2.69</td>\n",
" <td>2.69</td>\n",
" <td>10.3</td>\n",
" <td>2.62</td>\n",
" <td>3.68</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>362.43</td>\n",
" <td>362.43</td>\n",
" <td>0</td>\n",
" <td>81.59</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>CHEMBL140774</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL140774', 'parent...</td>\n",
" <td>{'acd_logd': '2.52', 'acd_logp': '2.54', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL140774</td>\n",
" <td>2.52</td>\n",
" <td>2.54</td>\n",
" <td>9.36</td>\n",
" <td>3.67</td>\n",
" <td>3.35</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>364.4</td>\n",
" <td>364.4</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>CHEMBL440529</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL440529', 'parent...</td>\n",
" <td>{'acd_logd': '2.61', 'acd_logp': '2.63', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL440529</td>\n",
" <td>2.61</td>\n",
" <td>2.63</td>\n",
" <td>9.35</td>\n",
" <td>3.67</td>\n",
" <td>3.4</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>408.45</td>\n",
" <td>408.45</td>\n",
" <td>0</td>\n",
" <td>111.05</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CHEMBL139833</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL139833', 'parent...</td>\n",
" <td>{'acd_logd': '0.89', 'acd_logp': '0.95', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL139833</td>\n",
" <td>0.89</td>\n",
" <td>0.95</td>\n",
" <td>9.35</td>\n",
" <td>6.52</td>\n",
" <td>1.48</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>387.44</td>\n",
" <td>387.44</td>\n",
" <td>0</td>\n",
" <td>114.29</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>CHEMBL141271</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL141271', 'parent...</td>\n",
" <td>{'acd_logd': '2.07', 'acd_logp': '2.09', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL141271</td>\n",
" <td>2.07</td>\n",
" <td>2.09</td>\n",
" <td>9.35</td>\n",
" <td>3.67</td>\n",
" <td>3.09</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>394.43</td>\n",
" <td>394.43</td>\n",
" <td>1</td>\n",
" <td>122.05</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>CHEMBL142135</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL142135', 'parent...</td>\n",
" <td>{'acd_logd': '3.4', 'acd_logp': '3.42', 'acd_m...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL142135</td>\n",
" <td>3.4</td>\n",
" <td>3.42</td>\n",
" <td>9.32</td>\n",
" <td>3.65</td>\n",
" <td>3.7</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>404.47</td>\n",
" <td>404.47</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>CHEMBL137897</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL137897', 'parent...</td>\n",
" <td>{'acd_logd': '1.49', 'acd_logp': '1.5', 'acd_m...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL137897</td>\n",
" <td>1.49</td>\n",
" <td>1.5</td>\n",
" <td>9.31</td>\n",
" <td>5.27</td>\n",
" <td>2.78</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>379.42</td>\n",
" <td>379.42</td>\n",
" <td>0</td>\n",
" <td>114.71</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>CHEMBL341946</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL341946', 'parent...</td>\n",
" <td>{'acd_logd': '3.07', 'acd_logp': '3.09', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL341946</td>\n",
" <td>3.07</td>\n",
" <td>3.09</td>\n",
" <td>9.36</td>\n",
" <td>3.67</td>\n",
" <td>3.78</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>392.46</td>\n",
" <td>392.46</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>CHEMBL137924</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL137924', 'parent...</td>\n",
" <td>{'acd_logd': '2.73', 'acd_logp': '2.74', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL137924</td>\n",
" <td>2.73</td>\n",
" <td>2.74</td>\n",
" <td>9.34</td>\n",
" <td>3.66</td>\n",
" <td>3.39</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>378.43</td>\n",
" <td>378.43</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>CHEMBL137828</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL137828', 'parent...</td>\n",
" <td>{'acd_logd': '3.59', 'acd_logp': '3.61', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL137828</td>\n",
" <td>3.59</td>\n",
" <td>3.61</td>\n",
" <td>9.34</td>\n",
" <td>3.66</td>\n",
" <td>4.15</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>457.32</td>\n",
" <td>457.32</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>CHEMBL137829</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL137829', 'parent...</td>\n",
" <td>{'acd_logd': '3.31', 'acd_logp': '3.33', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL137829</td>\n",
" <td>3.31</td>\n",
" <td>3.33</td>\n",
" <td>9.34</td>\n",
" <td>3.66</td>\n",
" <td>4.04</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>412.87</td>\n",
" <td>412.87</td>\n",
" <td>0</td>\n",
" <td>101.82</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>CHEMBL143680</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL143680', 'parent...</td>\n",
" <td>{'acd_logd': '2.33', 'acd_logp': '2.33', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL143680</td>\n",
" <td>2.33</td>\n",
" <td>2.33</td>\n",
" <td>None</td>\n",
" <td>4.3</td>\n",
" <td>3.72</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>366.42</td>\n",
" <td>366.42</td>\n",
" <td>0</td>\n",
" <td>54.21</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>CHEMBL143850</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL143850', 'parent...</td>\n",
" <td>{'acd_logd': '2.33', 'acd_logp': '2.33', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL143850</td>\n",
" <td>2.33</td>\n",
" <td>2.33</td>\n",
" <td>None</td>\n",
" <td>4.3</td>\n",
" <td>3.72</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>366.42</td>\n",
" <td>366.42</td>\n",
" <td>0</td>\n",
" <td>54.21</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>CHEMBL156658</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL156658', 'parent...</td>\n",
" <td>{'acd_logd': '0.8', 'acd_logp': '0.8', 'acd_mo...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL156658</td>\n",
" <td>0.8</td>\n",
" <td>0.8</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2.87</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>330.38</td>\n",
" <td>330.38</td>\n",
" <td>0</td>\n",
" <td>53.99</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>CHEMBL175504</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL175504', 'parent...</td>\n",
" <td>{'acd_logd': '3.65', 'acd_logp': '3.65', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL175504</td>\n",
" <td>3.65</td>\n",
" <td>3.65</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>4.37</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>323.39</td>\n",
" <td>323.39</td>\n",
" <td>0</td>\n",
" <td>32.62</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>CHEMBL189905</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL189905', 'parent...</td>\n",
" <td>{'acd_logd': '5.1', 'acd_logp': '5.1', 'acd_mo...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL189905</td>\n",
" <td>5.1</td>\n",
" <td>5.1</td>\n",
" <td>9.32</td>\n",
" <td>None</td>\n",
" <td>4.72</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>332.51</td>\n",
" <td>332.51</td>\n",
" <td>0</td>\n",
" <td>40.46</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>CHEMBL389126</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL389126', 'parent...</td>\n",
" <td>{'acd_logd': '5.65', 'acd_logp': '5.65', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL389126</td>\n",
" <td>5.65</td>\n",
" <td>5.65</td>\n",
" <td>10.81</td>\n",
" <td>None</td>\n",
" <td>4.56</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>314.47</td>\n",
" <td>314.47</td>\n",
" <td>0</td>\n",
" <td>40.46</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>CHEMBL265926</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL265926', 'parent...</td>\n",
" <td>{'acd_logd': '5.2', 'acd_logp': '5.21', 'acd_m...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL265926</td>\n",
" <td>5.2</td>\n",
" <td>5.21</td>\n",
" <td>8.84</td>\n",
" <td>None</td>\n",
" <td>4.21</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>398.28</td>\n",
" <td>398.28</td>\n",
" <td>0</td>\n",
" <td>40.46</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>CHEMBL1627436</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627436', 'paren...</td>\n",
" <td>{'acd_logd': '5.97', 'acd_logp': '5.97', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627436</td>\n",
" <td>5.97</td>\n",
" <td>5.97</td>\n",
" <td>10.18</td>\n",
" <td>None</td>\n",
" <td>5.03</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>326.48</td>\n",
" <td>326.48</td>\n",
" <td>1</td>\n",
" <td>40.46</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>CHEMBL1627438</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627438', 'paren...</td>\n",
" <td>{'acd_logd': '4.86', 'acd_logp': '4.86', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627438</td>\n",
" <td>4.86</td>\n",
" <td>4.86</td>\n",
" <td>9.87</td>\n",
" <td>None</td>\n",
" <td>4.4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>330.47</td>\n",
" <td>330.47</td>\n",
" <td>0</td>\n",
" <td>49.69</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>CHEMBL1627439</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627439', 'paren...</td>\n",
" <td>{'acd_logd': '5.55', 'acd_logp': '5.55', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627439</td>\n",
" <td>5.55</td>\n",
" <td>5.55</td>\n",
" <td>10.27</td>\n",
" <td>None</td>\n",
" <td>4.64</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>312.45</td>\n",
" <td>312.45</td>\n",
" <td>0</td>\n",
" <td>40.46</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>CHEMBL1627441</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627441', 'paren...</td>\n",
" <td>{'acd_logd': '4.71', 'acd_logp': '4.71', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627441</td>\n",
" <td>4.71</td>\n",
" <td>4.71</td>\n",
" <td>10.2</td>\n",
" <td>None</td>\n",
" <td>4.4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>330.47</td>\n",
" <td>330.47</td>\n",
" <td>0</td>\n",
" <td>49.69</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>CHEMBL1627442</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627442', 'paren...</td>\n",
" <td>{'acd_logd': '4.35', 'acd_logp': '4.35', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627442</td>\n",
" <td>4.35</td>\n",
" <td>4.35</td>\n",
" <td>10.42</td>\n",
" <td>None</td>\n",
" <td>4.01</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>316.44</td>\n",
" <td>316.44</td>\n",
" <td>0</td>\n",
" <td>49.69</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>CHEMBL1627445</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627445', 'paren...</td>\n",
" <td>{'acd_logd': '4.78', 'acd_logp': '4.79', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627445</td>\n",
" <td>4.78</td>\n",
" <td>4.79</td>\n",
" <td>11.37</td>\n",
" <td>5.94</td>\n",
" <td>4.04</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>315.46</td>\n",
" <td>315.46</td>\n",
" <td>0</td>\n",
" <td>52.49</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>CHEMBL1627838</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1627838', 'paren...</td>\n",
" <td>{'acd_logd': '6.06', 'acd_logp': '6.06', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1627838</td>\n",
" <td>6.06</td>\n",
" <td>6.06</td>\n",
" <td>10.16</td>\n",
" <td>None</td>\n",
" <td>5.03</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>326.48</td>\n",
" <td>326.48</td>\n",
" <td>1</td>\n",
" <td>40.46</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>CHEMBL1628145</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL1628145', 'paren...</td>\n",
" <td>{'acd_logd': '5.04', 'acd_logp': '5.04', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL1628145</td>\n",
" <td>5.04</td>\n",
" <td>5.04</td>\n",
" <td>10.06</td>\n",
" <td>None</td>\n",
" <td>4.25</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>298.43</td>\n",
" <td>298.43</td>\n",
" <td>0</td>\n",
" <td>40.46</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>CHEMBL3706760</td>\n",
" <td>None</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL3706760', 'paren...</td>\n",
" <td>{'acd_logd': '5.15', 'acd_logp': '5.15', 'acd_...</td>\n",
" <td>0</td>\n",
" <td>CHEMBL3706760</td>\n",
" <td>5.15</td>\n",
" <td>5.15</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5.53</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>373.45</td>\n",
" <td>373.45</td>\n",
" <td>1</td>\n",
" <td>32.62</td>\n",
" <td>28</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" molecule_chembl_id pref_name \\\n",
"0 CHEMBL67 COMBRETASTATIN A4 \n",
"1 CHEMBL107 COLCHICINE \n",
"2 CHEMBL20705 None \n",
"3 CHEMBL44918 None \n",
"4 CHEMBL299613 2-METHOXYESTRADIOL \n",
"5 CHEMBL79720 None \n",
"6 CHEMBL84903 None \n",
"7 CHEMBL85065 NAPHTHYLCOMBRETASTATIN \n",
"8 CHEMBL122397 None \n",
"9 CHEMBL436000 None \n",
"10 CHEMBL136780 None \n",
"11 CHEMBL422643 None \n",
"12 CHEMBL344614 None \n",
"13 CHEMBL138698 None \n",
"14 CHEMBL140774 None \n",
"15 CHEMBL440529 None \n",
"16 CHEMBL139833 None \n",
"17 CHEMBL141271 None \n",
"18 CHEMBL142135 None \n",
"19 CHEMBL137897 None \n",
"20 CHEMBL341946 None \n",
"21 CHEMBL137924 None \n",
"22 CHEMBL137828 None \n",
"23 CHEMBL137829 None \n",
"24 CHEMBL143680 None \n",
"25 CHEMBL143850 None \n",
"26 CHEMBL156658 None \n",
"27 CHEMBL175504 None \n",
"28 CHEMBL189905 None \n",
"29 CHEMBL389126 None \n",
"30 CHEMBL265926 None \n",
"31 CHEMBL1627436 None \n",
"32 CHEMBL1627438 None \n",
"33 CHEMBL1627439 None \n",
"34 CHEMBL1627441 None \n",
"35 CHEMBL1627442 None \n",
"36 CHEMBL1627445 None \n",
"37 CHEMBL1627838 None \n",
"38 CHEMBL1628145 None \n",
"39 CHEMBL3706760 None \n",
"\n",
" molecule_hierarchy \\\n",
"0 {'molecule_chembl_id': 'CHEMBL67', 'parent_che... \n",
"1 {'molecule_chembl_id': 'CHEMBL107', 'parent_ch... \n",
"2 {'molecule_chembl_id': 'CHEMBL20705', 'parent_... \n",
"3 {'molecule_chembl_id': 'CHEMBL44918', 'parent_... \n",
"4 {'molecule_chembl_id': 'CHEMBL299613', 'parent... \n",
"5 {'molecule_chembl_id': 'CHEMBL79720', 'parent_... \n",
"6 {'molecule_chembl_id': 'CHEMBL84903', 'parent_... \n",
"7 {'molecule_chembl_id': 'CHEMBL85065', 'parent_... \n",
"8 {'molecule_chembl_id': 'CHEMBL122397', 'parent... \n",
"9 {'molecule_chembl_id': 'CHEMBL436000', 'parent... \n",
"10 {'molecule_chembl_id': 'CHEMBL136780', 'parent... \n",
"11 {'molecule_chembl_id': 'CHEMBL422643', 'parent... \n",
"12 {'molecule_chembl_id': 'CHEMBL344614', 'parent... \n",
"13 {'molecule_chembl_id': 'CHEMBL138698', 'parent... \n",
"14 {'molecule_chembl_id': 'CHEMBL140774', 'parent... \n",
"15 {'molecule_chembl_id': 'CHEMBL440529', 'parent... \n",
"16 {'molecule_chembl_id': 'CHEMBL139833', 'parent... \n",
"17 {'molecule_chembl_id': 'CHEMBL141271', 'parent... \n",
"18 {'molecule_chembl_id': 'CHEMBL142135', 'parent... \n",
"19 {'molecule_chembl_id': 'CHEMBL137897', 'parent... \n",
"20 {'molecule_chembl_id': 'CHEMBL341946', 'parent... \n",
"21 {'molecule_chembl_id': 'CHEMBL137924', 'parent... \n",
"22 {'molecule_chembl_id': 'CHEMBL137828', 'parent... \n",
"23 {'molecule_chembl_id': 'CHEMBL137829', 'parent... \n",
"24 {'molecule_chembl_id': 'CHEMBL143680', 'parent... \n",
"25 {'molecule_chembl_id': 'CHEMBL143850', 'parent... \n",
"26 {'molecule_chembl_id': 'CHEMBL156658', 'parent... \n",
"27 {'molecule_chembl_id': 'CHEMBL175504', 'parent... \n",
"28 {'molecule_chembl_id': 'CHEMBL189905', 'parent... \n",
"29 {'molecule_chembl_id': 'CHEMBL389126', 'parent... \n",
"30 {'molecule_chembl_id': 'CHEMBL265926', 'parent... \n",
"31 {'molecule_chembl_id': 'CHEMBL1627436', 'paren... \n",
"32 {'molecule_chembl_id': 'CHEMBL1627438', 'paren... \n",
"33 {'molecule_chembl_id': 'CHEMBL1627439', 'paren... \n",
"34 {'molecule_chembl_id': 'CHEMBL1627441', 'paren... \n",
"35 {'molecule_chembl_id': 'CHEMBL1627442', 'paren... \n",
"36 {'molecule_chembl_id': 'CHEMBL1627445', 'paren... \n",
"37 {'molecule_chembl_id': 'CHEMBL1627838', 'paren... \n",
"38 {'molecule_chembl_id': 'CHEMBL1628145', 'paren... \n",
"39 {'molecule_chembl_id': 'CHEMBL3706760', 'paren... \n",
"\n",
" molecule_properties max_phase \\\n",
"0 {'acd_logd': '2.92', 'acd_logp': '2.92', 'acd_... 0 \n",
"1 {'acd_logd': '1.07', 'acd_logp': '1.07', 'acd_... 4 \n",
"2 {'acd_logd': '3.06', 'acd_logp': '3.09', 'acd_... 0 \n",
"3 {'acd_logd': '2.82', 'acd_logp': '2.83', 'acd_... 0 \n",
"4 {'acd_logd': '3.84', 'acd_logp': '3.84', 'acd_... 0 \n",
"5 {'acd_logd': '1.01', 'acd_logp': '1.01', 'acd_... 0 \n",
"6 {'acd_logd': '4.64', 'acd_logp': '4.65', 'acd_... 0 \n",
"7 {'acd_logd': '4.53', 'acd_logp': '4.53', 'acd_... 0 \n",
"8 {'acd_logd': '3.59', 'acd_logp': '3.61', 'acd_... 0 \n",
"9 {'acd_logd': '3.05', 'acd_logp': '3.05', 'acd_... 0 \n",
"10 {'acd_logd': '1.39', 'acd_logp': '1.39', 'acd_... 0 \n",
"11 {'acd_logd': '2.24', 'acd_logp': '2.24', 'acd_... 0 \n",
"12 {'acd_logd': '1.9', 'acd_logp': '1.9', 'acd_mo... 0 \n",
"13 {'acd_logd': '2.69', 'acd_logp': '2.69', 'acd_... 0 \n",
"14 {'acd_logd': '2.52', 'acd_logp': '2.54', 'acd_... 0 \n",
"15 {'acd_logd': '2.61', 'acd_logp': '2.63', 'acd_... 0 \n",
"16 {'acd_logd': '0.89', 'acd_logp': '0.95', 'acd_... 0 \n",
"17 {'acd_logd': '2.07', 'acd_logp': '2.09', 'acd_... 0 \n",
"18 {'acd_logd': '3.4', 'acd_logp': '3.42', 'acd_m... 0 \n",
"19 {'acd_logd': '1.49', 'acd_logp': '1.5', 'acd_m... 0 \n",
"20 {'acd_logd': '3.07', 'acd_logp': '3.09', 'acd_... 0 \n",
"21 {'acd_logd': '2.73', 'acd_logp': '2.74', 'acd_... 0 \n",
"22 {'acd_logd': '3.59', 'acd_logp': '3.61', 'acd_... 0 \n",
"23 {'acd_logd': '3.31', 'acd_logp': '3.33', 'acd_... 0 \n",
"24 {'acd_logd': '2.33', 'acd_logp': '2.33', 'acd_... 0 \n",
"25 {'acd_logd': '2.33', 'acd_logp': '2.33', 'acd_... 0 \n",
"26 {'acd_logd': '0.8', 'acd_logp': '0.8', 'acd_mo... 0 \n",
"27 {'acd_logd': '3.65', 'acd_logp': '3.65', 'acd_... 0 \n",
"28 {'acd_logd': '5.1', 'acd_logp': '5.1', 'acd_mo... 0 \n",
"29 {'acd_logd': '5.65', 'acd_logp': '5.65', 'acd_... 0 \n",
"30 {'acd_logd': '5.2', 'acd_logp': '5.21', 'acd_m... 0 \n",
"31 {'acd_logd': '5.97', 'acd_logp': '5.97', 'acd_... 0 \n",
"32 {'acd_logd': '4.86', 'acd_logp': '4.86', 'acd_... 0 \n",
"33 {'acd_logd': '5.55', 'acd_logp': '5.55', 'acd_... 0 \n",
"34 {'acd_logd': '4.71', 'acd_logp': '4.71', 'acd_... 0 \n",
"35 {'acd_logd': '4.35', 'acd_logp': '4.35', 'acd_... 0 \n",
"36 {'acd_logd': '4.78', 'acd_logp': '4.79', 'acd_... 0 \n",
"37 {'acd_logd': '6.06', 'acd_logp': '6.06', 'acd_... 0 \n",
"38 {'acd_logd': '5.04', 'acd_logp': '5.04', 'acd_... 0 \n",
"39 {'acd_logd': '5.15', 'acd_logp': '5.15', 'acd_... 0 \n",
"\n",
" parent_chembl_id acd_logd acd_logp acd_most_apka acd_most_bpka alogp hba \\\n",
"0 CHEMBL67 2.92 2.92 9.65 None 3.6 5 \n",
"1 CHEMBL107 1.07 1.07 None None 2.87 6 \n",
"2 CHEMBL20705 3.06 3.09 8.43 None 3.63 3 \n",
"3 CHEMBL44918 2.82 2.83 9.34 3.66 3.53 5 \n",
"4 CHEMBL299613 3.84 3.84 10.29 None 3.62 3 \n",
"5 CHEMBL79720 1.01 1.01 None None 2.75 5 \n",
"6 CHEMBL84903 4.64 4.65 9.69 None 4.72 2 \n",
"7 CHEMBL85065 4.53 4.53 None None 5.04 3 \n",
"8 CHEMBL122397 3.59 3.61 9.39 3.69 3.73 5 \n",
"9 CHEMBL436000 3.05 3.05 None None 3.26 2 \n",
"10 CHEMBL136780 1.39 1.39 None 4.51 2.84 7 \n",
"11 CHEMBL422643 2.24 2.24 10.29 2.63 4.07 4 \n",
"12 CHEMBL344614 1.9 1.9 10.29 2.61 3.68 4 \n",
"13 CHEMBL138698 2.69 2.69 10.3 2.62 3.68 4 \n",
"14 CHEMBL140774 2.52 2.54 9.36 3.67 3.35 5 \n",
"15 CHEMBL440529 2.61 2.63 9.35 3.67 3.4 6 \n",
"16 CHEMBL139833 0.89 0.95 9.35 6.52 1.48 7 \n",
"17 CHEMBL141271 2.07 2.09 9.35 3.67 3.09 6 \n",
"18 CHEMBL142135 3.4 3.42 9.32 3.65 3.7 5 \n",
"19 CHEMBL137897 1.49 1.5 9.31 5.27 2.78 6 \n",
"20 CHEMBL341946 3.07 3.09 9.36 3.67 3.78 5 \n",
"21 CHEMBL137924 2.73 2.74 9.34 3.66 3.39 5 \n",
"22 CHEMBL137828 3.59 3.61 9.34 3.66 4.15 5 \n",
"23 CHEMBL137829 3.31 3.33 9.34 3.66 4.04 5 \n",
"24 CHEMBL143680 2.33 2.33 None 4.3 3.72 6 \n",
"25 CHEMBL143850 2.33 2.33 None 4.3 3.72 6 \n",
"26 CHEMBL156658 0.8 0.8 None None 2.87 5 \n",
"27 CHEMBL175504 3.65 3.65 None None 4.37 4 \n",
"28 CHEMBL189905 5.1 5.1 9.32 None 4.72 3 \n",
"29 CHEMBL389126 5.65 5.65 10.81 None 4.56 2 \n",
"30 CHEMBL265926 5.2 5.21 8.84 None 4.21 2 \n",
"31 CHEMBL1627436 5.97 5.97 10.18 None 5.03 2 \n",
"32 CHEMBL1627438 4.86 4.86 9.87 None 4.4 3 \n",
"33 CHEMBL1627439 5.55 5.55 10.27 None 4.64 2 \n",
"34 CHEMBL1627441 4.71 4.71 10.2 None 4.4 3 \n",
"35 CHEMBL1627442 4.35 4.35 10.42 None 4.01 3 \n",
"36 CHEMBL1627445 4.78 4.79 11.37 5.94 4.04 3 \n",
"37 CHEMBL1627838 6.06 6.06 10.16 None 5.03 2 \n",
"38 CHEMBL1628145 5.04 5.04 10.06 None 4.25 2 \n",
"39 CHEMBL3706760 5.15 5.15 None None 5.53 4 \n",
"\n",
" hbd mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n",
"0 1 316.35 316.35 0 57.15 23 \n",
"1 1 399.44 399.44 0 83.09 29 \n",
"2 2 336.8 336.8 0 71.19 22 \n",
"3 5 396.42 396.42 0 101.82 29 \n",
"4 2 302.41 302.41 0 49.69 22 \n",
"5 0 302.33 302.33 0 53.99 22 \n",
"6 1 276.33 276.33 0 29.46 21 \n",
"7 0 320.39 320.39 1 27.69 24 \n",
"8 5 358.44 358.44 0 101.82 26 \n",
"9 1 284.4 284.4 0 37.3 21 \n",
"10 1 385.42 385.42 0 95.61 28 \n",
"11 4 376.46 376.46 0 81.59 28 \n",
"12 4 362.43 362.43 0 81.59 27 \n",
"13 4 362.43 362.43 0 81.59 27 \n",
"14 5 364.4 364.4 0 101.82 27 \n",
"15 5 408.45 408.45 0 111.05 30 \n",
"16 5 387.44 387.44 0 114.29 28 \n",
"17 6 394.43 394.43 1 122.05 29 \n",
"18 5 404.47 404.47 0 101.82 30 \n",
"19 5 379.42 379.42 0 114.71 28 \n",
"20 5 392.46 392.46 0 101.82 29 \n",
"21 5 378.43 378.43 0 101.82 28 \n",
"22 5 457.32 457.32 0 101.82 29 \n",
"23 5 412.87 412.87 0 101.82 29 \n",
"24 0 366.42 366.42 0 54.21 27 \n",
"25 0 366.42 366.42 0 54.21 27 \n",
"26 0 330.38 330.38 0 53.99 24 \n",
"27 0 323.39 323.39 0 32.62 24 \n",
"28 2 332.51 332.51 0 40.46 23 \n",
"29 2 314.47 314.47 0 40.46 23 \n",
"30 2 398.28 398.28 0 40.46 21 \n",
"31 2 326.48 326.48 1 40.46 24 \n",
"32 2 330.47 330.47 0 49.69 24 \n",
"33 2 312.45 312.45 0 40.46 23 \n",
"34 2 330.47 330.47 0 49.69 24 \n",
"35 2 316.44 316.44 0 49.69 23 \n",
"36 3 315.46 315.46 0 52.49 23 \n",
"37 2 326.48 326.48 1 40.46 24 \n",
"38 2 298.43 298.43 0 40.46 22 \n",
"39 0 373.45 373.45 1 32.62 28 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"###########################\n",
"# Select only relevant columns: \n",
"###########################\n",
"mol_df = mol_df[[ 'molecule_chembl_id','pref_name', 'molecule_hierarchy'\n",
" , 'molecule_properties', 'max_phase']]\n",
"\n",
"###########################\n",
"#And convert cells containing a dictionary to individual columns in the dataframe so that is it easier to filter!\n",
"###########################\n",
"# Molecule hierarchy: \n",
"mol_df['parent_chembl_id'] = mol_df['molecule_hierarchy'].apply(lambda x: x['parent_chembl_id'])\n",
"\n",
"#Physicochemical properties (only report if cells are not null)\n",
"mol_df['acd_logd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['acd_logd'])\n",
"mol_df['acd_logp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['acd_logp'])\n",
"mol_df['acd_most_apka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['acd_most_apka'])\n",
"mol_df['acd_most_bpka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['acd_most_bpka'])\n",
"mol_df['alogp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['alogp'])\n",
"mol_df['hba'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hba'])\n",
"mol_df['hbd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hbd'])\n",
"mol_df['mw_freebase'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['mw_freebase']) #This is the mwt of the parent compound\n",
"mol_df['full_mwt'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['full_mwt']) #This is the mwt of the full compound including any salt\n",
"mol_df['num_ro5_violations'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['num_ro5_violations'])\n",
"mol_df['psa'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['psa'])\n",
"mol_df['heavy_atoms'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['heavy_atoms'])\n",
"\n",
"mol_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filter the compound list based on relevant information"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>molecule_chembl_id</th>\n",
" <th>pref_name</th>\n",
" <th>molecule_hierarchy</th>\n",
" <th>molecule_properties</th>\n",
" <th>max_phase</th>\n",
" <th>parent_chembl_id</th>\n",
" <th>acd_logd</th>\n",
" <th>acd_logp</th>\n",
" <th>acd_most_apka</th>\n",
" <th>acd_most_bpka</th>\n",
" <th>alogp</th>\n",
" <th>hba</th>\n",
" <th>hbd</th>\n",
" <th>mw_freebase</th>\n",
" <th>full_mwt</th>\n",
" <th>num_ro5_violations</th>\n",
" <th>psa</th>\n",
" <th>heavy_atoms</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL107</td>\n",
" <td>COLCHICINE</td>\n",
" <td>{'molecule_chembl_id': 'CHEMBL107', 'parent_ch...</td>\n",
" <td>{'acd_logd': '1.07', 'acd_logp': '1.07', 'acd_...</td>\n",
" <td>4</td>\n",
" <td>CHEMBL107</td>\n",
" <td>1.07</td>\n",
" <td>1.07</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2.87</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>399.44</td>\n",
" <td>399.44</td>\n",
" <td>0</td>\n",
" <td>83.09</td>\n",
" <td>29</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" molecule_chembl_id pref_name \\\n",
"1 CHEMBL107 COLCHICINE \n",
"\n",
" molecule_hierarchy \\\n",
"1 {'molecule_chembl_id': 'CHEMBL107', 'parent_ch... \n",
"\n",
" molecule_properties max_phase \\\n",
"1 {'acd_logd': '1.07', 'acd_logp': '1.07', 'acd_... 4 \n",
"\n",
" parent_chembl_id acd_logd acd_logp acd_most_apka acd_most_bpka alogp hba \\\n",
"1 CHEMBL107 1.07 1.07 None None 2.87 6 \n",
"\n",
" hbd mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n",
"1 1 399.44 399.44 0 83.09 29 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"###########################\n",
"#Filter the compounds based on their molecular properties, or max_phase, for example:\n",
"###########################\n",
"\n",
"#Now keep only molecules with max_phase = 4 (ie approved drugs), for example: \n",
"res = mol_df[ mol_df['max_phase'] == 4 ]\n",
"\n",
"# # OR keep only molecules with less than 400 amu, for example: \n",
"# # but first need to convert strings to float for 'full_mwt':\n",
"# mol_df['full_mwt'] = mol_df['full_mwt'].apply(lambda x: float(x) )\n",
"# res = mol_df[ mol_df['full_mwt'] < 400 ]\n",
"\n",
"#Display only top few rows:\n",
"res.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment