Skip to content

Instantly share code, notes, and snippets.

@PatWalters
Created November 23, 2023 01:55
Show Gist options
  • Save PatWalters/c64d59c262ba1993d20af815cd5e0b5a to your computer and use it in GitHub Desktop.
Save PatWalters/c64d59c262ba1993d20af815cd5e0b5a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "a34531cd",
"metadata": {},
"outputs": [],
"source": [
"from astartes.molecules import train_test_split_molecules\n",
"from astartes.utils.warnings import ImperfectSplittingWarning\n",
"from astartes import samplers\n",
"import useful_rdkit_utils as uru\n",
"from lightgbm import LGBMRegressor\n",
"import numpy as np\n",
"from sklearn.metrics import r2_score\n",
"import warnings\n",
"from tqdm.auto import tqdm\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"from rdkit.Chem.Scaffolds import MurckoScaffold"
]
},
{
"cell_type": "markdown",
"id": "f3482017",
"metadata": {},
"source": [
"Get the samplers from astartes"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c411d228",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('dbscan', 'scaffold', 'kmeans', 'optisim', 'sphere_exclusion', 'time_based')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"samplers.IMPLEMENTED_EXTRAPOLATION_SAMPLERS"
]
},
{
"cell_type": "markdown",
"id": "b250624a",
"metadata": {},
"source": [
"Grab some data and add fingerprints"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f3d315ee",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/PatWalters/yamc/main/data/B-raf.smi\",\n",
" names=[\"SMILES\",\"Name\",\"pIC50\"],\n",
" sep=\" \")\n",
"df['numpy_fp'] = df.SMILES.apply(uru.smi2numpy_fp)"
]
},
{
"cell_type": "markdown",
"id": "7f8eadcd",
"metadata": {},
"source": [
"Create a list of samplers and hyperparameters. The hyperparameters are just quick estimates based on some empirical tests"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d9d570b7",
"metadata": {},
"outputs": [],
"source": [
"param_list = [['random',{}],\n",
" ['scaffold',{}],\n",
" ['dbscan', dict(eps=6.0)],\n",
" ['kmeans',dict(n_clusters=100)],\n",
" ['optisim',dict(n_clusters=100)],\n",
" ['sphere_exclusion',{}]]"
]
},
{
"cell_type": "markdown",
"id": "0b65480c",
"metadata": {},
"source": [
"The astartes package warns when the training and test set sizes are not exactly what you asked for. This will happend with methods that use clusters to generate splits. I know this will happen and don't want to see the warnings. "
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ce225a16",
"metadata": {},
"outputs": [],
"source": [
"warnings.filterwarnings(\"ignore\", category=ImperfectSplittingWarning)"
]
},
{
"cell_type": "markdown",
"id": "bd6bba70",
"metadata": {},
"source": [
"Run 10 folds of cross validation with each of the splits"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5bee0345",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1505821f17ef460c88f3d413c9bd4067",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/6 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e32702ce5a3a4c769846ad7558603579",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dc19a3c79a284705a1a7e89559d5ae4c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d101f4dd8c7a4256b84fc1cc0a22db0d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "89589b095b07414899029fc44734c8ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eb7457a99aaa4b69bc68a5489a65d072",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "313e1751f54748488ec8105cf1886fbb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"output = []\n",
"for sampler, hopts in tqdm(param_list):\n",
" for i in tqdm(range(0,10),leave=True):\n",
" res = train_test_split_molecules(molecules=df.SMILES.values, return_indices=True, sampler=sampler,\n",
" random_state=i, hopts=hopts)\n",
" train_idx, test_idx = res[-2:]\n",
" train = df.iloc[train_idx]\n",
" test = df.iloc[test_idx]\n",
" lgbm = LGBMRegressor()\n",
" lgbm.fit(np.stack(train.numpy_fp),train.pIC50)\n",
" pred = lgbm.predict(np.stack(test.numpy_fp))\n",
" output.append([sampler, len(train), len(test), r2_score(test.pIC50, pred)])"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "7dc70551",
"metadata": {},
"outputs": [],
"source": [
"output_df = pd.DataFrame(output,columns=['sampler','n_train','n_test','r2'])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "1f94d823",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='sampler', ylabel='r2'>"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIRCAYAAABAnYxxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKLklEQVR4nO3deVxV1f7/8fdhOKAhmCZiiqkZIg5oouJcGt1mzeyWRWoqWberJYlDWalfS/06UGiUlWY2aJPZcLndrG5l96rZ6IBoOCAqiKWBA5Oe9fvDH+frCTU2Alvk9Xw8eABrr7X355yzz/Bmr71xGGOMAAAAAABl5mV3AQAAAABQ3RCkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEU+dhdgtx9//FHGGPn6+tpdCgAAAAAbFRcXy+FwqGPHjn/at8YHKWOM+J/EAAAAAKzkghofpEqORLVr187mSgAAAADYaePGjWXuyzlSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACyyPUi5XC4lJSWpV69eioyM1PDhw5WRkXHG/gcOHFB8fLy6du2qrl276qGHHlJ2dnYVVgwAAACgprM9SCUnJ2v58uWaPn263nrrLTkcDsXFxamoqOi0/ceOHausrCy98soreuWVV5Sdna2//e1vVVw1AAAAgJrM1iBVVFSkxYsXa/To0erTp4/Cw8OVmJio/fv3a9WqVaX65+Xlaf369YqLi1NERIQiIiJ03333afPmzTp06JANtwAAAABATWRrkEpLS9PRo0cVHR3tbgsMDFRERITWr19fqr+fn59q166tlStX6siRIzpy5Ig++OADNWvWTEFBQVVZOgAAAIAazMfOjZec29SoUSOP9uDgYGVlZZXq7+fnp6eeekrTpk1TVFSUHA6HGjRooNdff11eXuXPhMYYHTt2rNzjAQAAAFR/xhg5HI4y9bU1SOXn50uSnE6nR7ufn59yc3NL9TfGaOvWrerYsaNGjhypEydOKDExUQ8++KCWLVumgICActVRXFysLVu2lGssAAAAgAvHH7PJmdgapPz9/SWdPFeq5GdJKiwsVK1atUr1/8c//qE333xT//73v92h6YUXXtDVV1+t9957T0OHDi1XHb6+vmrZsmW5xgIAAAC4MKSnp5e5r61BqmRKX05Ojpo2bepuz8nJUXh4eKn+33//vZo3b+5x5CkoKEjNmzfXrl27yl2Hw+FQ7dq1yz0eAAAAQPVX1ml9ks1BKjw8XAEBAVq3bp07SOXl5Sk1NVWxsbGl+jdq1EgpKSkqLCyUn5+fpJPTA/fs2aObb765SmuvKtnZ2Tpy5IjdZVSIgIAAhYSE2F0GAAAAcM5sDVJOp1OxsbGaM2eO6tWrp8aNG2v27NkKCQlRTEyMTpw4oYMHD6pOnTry9/fXgAEDtGjRIj388MN66KGHJEnPPPOMnE6nBg4caOdNqRS5ubkaNWqUXC6X3aVUCC8vLy1dupQrLAIAAKDaszVISdKYMWN0/PhxTZ48WQUFBercubMWLVokp9OpPXv2qF+/fpoxY4YGDhyo4OBgvfnmm5o9e7aGDh0qLy8vRUVFadmyZQoMDLT7plS4oKAgLVy4sNKPSGVmZmrevHmKj49XaGhopW0nICCAEAUAAIALgsMYY+wuwk4bN26UJLVr187mSuyTnp6usWPHKjExkYtuAAAAoMaykg1s/Ye8AAAAAFAdEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAi3zsLqC6ysnJUV5ent1lVIjMzEyP79VdYGCggoOD7S4DAAAAFzCCVDnk5OTo/gceUHFRkd2lVKh58+bZXUKF8HU69cLzzxOmAAAAUGkIUuWQl5en4qIi+V8aLS9noN3l4BSuojwV7FurvLw8ghQAAAAqDUHqHHg5A+Vdq57dZQAAAACoYlxsAgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGAR/5D3HLgK8+wuAX/AYwIAAICqQJA6BwVZa+0uAQAAAIANCFLnwL9RtLz8Au0uA6dwFeYRcAEAAFDpCFLnwMsvUN616tldBgAAAIAqxsUmAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwyPYg5XK5lJSUpF69eikyMlLDhw9XRkbGafvOnz9frVq1Ou3XpEmTqrhyAAAAADWV7UEqOTlZy5cv1/Tp0/XWW2/J4XAoLi5ORUVFpfoOHz5c33zzjcfXww8/LH9/fw0dOtSG6gEAAADURLYGqaKiIi1evFijR49Wnz59FB4ersTERO3fv1+rVq0q1f+iiy5SgwYN3F/5+flauHChJk6cqPDwcBtuAQAAAICayNYglZaWpqNHjyo6OtrdFhgYqIiICK1fv/5Px8+cOVNXXHGF7rjjjsosEwAAAAA8+Ni58ezsbElSo0aNPNqDg4OVlZV11rEbN27U559/rldffVVeXueWB40xOnbsWJn7FxQUnNP2UPkKCgosPaYAAACAMUYOh6NMfW0NUvn5+ZIkp9Pp0e7n56fc3Nyzjl2yZIkiIyM9jmaVV3FxsbZs2VLm/vv27ZMkuYryznnbqFglj8nOnTtVWFhoczUAAACobv6YTc7E1iDl7+8v6eS5UiU/S1JhYaFq1ap1xnHHjh3TqlWr9OSTT1ZIHb6+vmrZsmWZ+zdo0EC+vk4V7FtbIdtHxfL1dSoyMlKXXHKJ3aUAAACgGklPTy9zX1uDVMmUvpycHDVt2tTdnpOTc9aLR6xevVoul0sxMTEVUofD4VDt2rXL3L9p06Z64YXnlZd3YRyRyszM1Lx58xQfH6/Q0FC7yzlngYGBCg4OtrsMAAAAVDNlndYn2RykwsPDFRAQoHXr1rmDVF5enlJTUxUbG3vGcd9//73atGmjwMDAqiq1lODg4Avuw3poaKilI3MAAABATWVrkHI6nYqNjdWcOXNUr149NW7cWLNnz1ZISIhiYmJ04sQJHTx4UHXq1PGY+peWlqawsDAbKwcAAABQk9n+D3nHjBmjQYMGafLkyRo8eLC8vb21aNEiOZ1OZWVlqWfPnkpJSfEY8+uvv6pu3br2FAwAAACgxrP1iJQkeXt7KyEhQQkJCaWWNWnSRFu3bi3V/sdgBQAAAABVyfYjUgAAAABQ3RCkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFPnYXgLPLzs7WkSNHKnUbmZmZHt8rS0BAgEJCQip1GwAAAEBVIEidx3JzczVq1Ci5XK4q2d68efMqdf1eXl5aunSpgoKCKnU7AAAAQGUjSJ3HgoKCtHDhwko/IlVVAgICCFEAAAC4IBCkznNMhQMAAADOP1xsAgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALPKxuwAAAACcXXZ2to4cOWJ3GRUmICBAISEhdpcBnBOCFAAAwHksNzdXo0aNksvlsruUCuPl5aWlS5cqKCjI7lKAciNIAQAAnMeCgoK0cOHCSj8ilZmZqXnz5ik+Pl6hoaGVuq2AgABCFKo9ghQAAMB5riqnwYWGhqply5ZVtj2guiJIAQAAnIOcnBzl5eXZXcY5y8zM9Ph+IQgMDFRwcLDdZeACRZACAAAop5ycHN3/wAMqLiqyu5QKM2/ePLtLqDC+TqdeeP55whQqBUEKAACgnPLy8lRcVCT/S6Pl5Qy0uxycwlWUp4J9a5WXl0eQQqXg/0gBAACcK2N3ASiFxwSVjCNSAAAA56gga63dJQCoYgQpAACAc+TfKFpefkztO5+4CvMIuKhUBCkAAIBz5OUXKO9a9ewuA0AV4hwpAAAAALCIIAUAAAAAFhGkAAAAAMAizpECAOA8l52drSNHjthdRoUJCAhQSEiI3WUAwDmxPUi5XC4tWLBA77zzjvLy8tSpUyc9+eSTuuyyy07bv7i4WElJSVq5cqUOHz6stm3b6rHHHlPr1q2ruHIAACpfbm6uRo0aJZfLZXcpFcbLy0tLly5VUFCQ3aVUGFdRnt0l4A94TFDZbA9SycnJWr58uWbMmKGGDRtq9uzZiouL08cffyyn01mq/5QpU/TFF19oxowZCg0NVWJiouLi4vTPf/5TderUseEWAABQeYKCgrRw4cJKPyKVmZmpefPmKT4+XqGhoZW6rYCAgAsmRAUGBsrX6VTBPi6zfT7ydToVGMhl6VE5bA1SRUVFWrx4sRISEtSnTx9JUmJionr16qVVq1bpxhtv9OifmZmpd999VwsXLtRVV10lSXr66ac1YMAAbdq0Sd26davqmwAAQKWrymlwoaGhatmyZZVtr7oLDg7WC88/r7y86n/0oyrDdFUJDAxUcHCw3WXgAmVrkEpLS9PRo0cVHR3tbgsMDFRERITWr19fKkh98803CgwMVO/evT36f/HFF1VWMwAAwKmCg4MvqA/rhGmgbGwNUtnZ2ZKkRo0aebQHBwcrKyurVP9du3YpNDRUn376qV588UXt379fERERmjhxoi6//PJy12GM0bFjx8o9HgCA6q6goMD9nffEmol9ADiZCxwOR5n62hqk8vPzJanUuVB+fn7Kzc0t1f/IkSPavXu3kpOTNX78eAUGBur555/XXXfdpZSUFNWvX79cdRQXF2vLli3lGgsAwIVg3759kqSdO3eqsLDQ5mpgB/YB4KTTXafhdGwNUv7+/pJOnitV8rMkFRYWqlatWqX6+/r66vDhw0pMTHQfgUpMTFSfPn30/vvva+TIkeWqw9fXl0PYAIAazc/PT5LUvHlztWjRwuZqYAf2AUBKT08vc19bg1TJlL6cnBw1bdrU3Z6Tk6Pw8PBS/UNCQuTj4+Mxjc/f31+hoaHas2dPuetwOByqXbt2uccDAFDdlfxB09/fn/fEGop9AFCZp/VJklcl1vGnwsPDFRAQoHXr1rnb8vLylJqaqqioqFL9o6KidPz4cW3cuNHdVlBQoMzMzDP+3ykAAAAAqGi2HpFyOp2KjY3VnDlzVK9ePTVu3FizZ89WSEiIYmJidOLECR08eFB16tSRv7+/oqKi1L17d02YMEHTpk1T3bp1lZSUJG9vb/Xv39/OmwIAAACgBrH9H/KOGTNGx48f1+TJk1VQUKDOnTtr0aJFcjqd2rNnj/r166cZM2Zo4MCBkqT58+drzpw5+vvf/66CggJdeeWVWrp0qerVq2fzLQEAAKgc2dnZVfJPmU/9XpkCAgKq9P+jAZXBYYwxdhdhp5Jpgu3atbO5EgAA7JOenq6xY8cqMTGRCzCdZ3JzczVkyBC5XC67S6kwXl5eWrp0qYKCguwuBfBgJRvYfkQKAAAAZxYUFKSFCxdW+hGpqhQQEECIQrVHkAIAADjPMQ0OOP/YetU+AAAAAKiOCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgkY/dBQAAAADV2Y4dO5SRkVHp28nMzNSRI0cqfTtVISAgQKGhoZW+ncsuu0wtWrSolHUTpAAAAIBz8NJLL2nTpk12l4HTaNu2rWbMmFEp6yZIAQAAAOcgLi6OI1IWVeURqcpCkAIAAADOQYsWLSpt+hjOX1xsAgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALDI9iDlcrmUlJSkXr16KTIyUsOHD1dGRsYZ+7///vtq1apVqa+zjQEAAACAiuRjdwHJyclavny5ZsyYoYYNG2r27NmKi4vTxx9/LKfTWar/1q1b1aVLF82bN8+jvV69elVVMgAAAIAaztYjUkVFRVq8eLFGjx6tPn36KDw8XImJidq/f79WrVp12jHbtm1TeHi4GjRo4PHl7e1dxdUDAAAAqKlsDVJpaWk6evSooqOj3W2BgYGKiIjQ+vXrTztm69atatmyZVWVCAAAAACl2Dq1Lzs7W5LUqFEjj/bg4GBlZWWV6n/w4EH9+uuvWr9+vV577TX9/vvvioyM1Lhx49S8efNy12GM0bFjx8o9HgBQM/3666/Ky8uzu4wKsXfvXknS9u3bVVBQYHM1FSMwMFCXXHKJ3WUAqEaMMXI4HGXqa2uQys/Pl6RS50L5+fkpNze3VP9t27ZJkry9vTVr1iwdO3ZMycnJuuuuu/TRRx+V+8WyuLhYW7ZsKddYAEDN9Pvvv2vBgud0/Hix3aVUqAULFthdQoXx8fHV3//+oOrWrWt3KQCqkdNdp+F0bA1S/v7+kk6eK1XysyQVFhaqVq1apfpHR0fr22+/VVBQkLvtueee09VXX60VK1bovvvuK1cdvr6+TBcEAFiyY8cOHT9eLP9Lo+XlDLS7HPyBqyhPBfvWqmHDhmrRooXd5QCoJtLT08vc19YgVTKlLycnR02bNnW35+TkKDw8/LRjTg1RklS7dm01adJE+/fvL3cdDodDtWvXLvd4AEDNU/IHQC9noLxrceXY85W/vz/v8QDKrKzT+iSbLzYRHh6ugIAArVu3zt2Wl5en1NRURUVFler/5ptvqmvXrh5zt48cOaJdu3ZxRAkAAABAlbE1SDmdTsXGxmrOnDn6/PPPlZaWprFjxyokJEQxMTE6ceKEDhw44A5OV199tYwxGj9+vH755Rdt3LhRo0ePVr169XTrrbfaeVMAAAAA1CC2BilJGjNmjAYNGqTJkydr8ODB8vb21qJFi+R0OpWVlaWePXsqJSVF0smpgK+++qqOHj2qwYMHa9iwYapTp46WLl3qcY4VAAAAAFQmW8+Rkk5egS8hIUEJCQmlljVp0kRbt271aGvdurUWLVpUVeUBAAAAQCm2H5ECAAAAgOqGIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAi3zsLgAAgOrMVZhndwk4DR4XAJWNIAUAwDkoyFprdwkAABsQpAAAOAf+jaLl5Rdodxn4A1dhHiEXQKUiSAEAcA68/ALlXaue3WUAAKoYF5sAAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWWQpSH374oW6++WZ16NBBf/3rX/XFF1+U6vPzzz+rdevWFVYgAAAAAJxvyhykUlJSNH78eF166aW64447lJubqwcffFCJiYmVWR8AAAAAnHd8ytrx5Zdf1l133aUnnnhCkpSQkKCZM2fqxRdflDFG8fHxlVYkAAAAAJxPyhykdu3apYSEhP8b6OOjyZMny9fXVy+99JLq16+voUOHVkqRAAAAAHA+KfPUvrp16yonJ6dU+4QJE3Tddddp1qxZ+vTTTy0X4HK5lJSUpF69eikyMlLDhw9XRkZGmcZ+9NFHatWqlfbs2WN5uwAAAABQXmUOUt27d1dSUpI2b95catmsWbPUvn17PfLII3r//fctFZCcnKzly5dr+vTpeuutt+RwOBQXF6eioqKzjtu7d6+mTp1qaVsAAAAAUBHKHKTi4+NVp04dDRo0SFOmTPFY5nQ69fLLL6t9+/Zavnx5mTdeVFSkxYsXa/To0erTp4/Cw8OVmJio/fv3a9WqVWcc53K5lJCQoDZt2pR5WwAAAABQUcocpOrVq6d33nlHzz77rNasWaMff/zRY3lAQICWLl2qSZMmqVOnTmVaZ1pamo4eParo6Gh3W2BgoCIiIrR+/fozjnvhhRdUXFysUaNGlbV8AAAAAKgwZb7YhCT5+vrq2muv1aRJk1RQUFBqube3t4YOHVrmi05kZ2dLkho1auTRHhwcrKysrNOO2bBhgxYvXqx3331X+/fvt1L+GRljdOzYsQpZFwCgZjjd+yDOPwUFBbzHAygzY4wcDkeZ+loKUiXatWunr7/+Wt26dSvPcLf8/HxJJ6cGnsrPz0+5ubml+h87dkzjxo3TuHHj1KxZswoLUsXFxdqyZUuFrAsAUDPs27fP7hJQBjt37lRhYaHdZQCoRv6YTc6kXEGqVatWeu211/Svf/1LLVu2VP369T2WOxwOPf3003+6Hn9/f0knz5Uq+VmSCgsLVatWrVL9p0+frmbNmunOO+8sT9ln5Ovrq5YtW1boOgEAFzY/Pz+7S0AZNG/eXC1atLC7DADVRHp6epn7litIrVq1SsHBwe6N/XGDZT0cVjKlLycnR02bNnW35+TkKDw8vFT/9957T06nUx07dpQknThxQpJ000036ZZbbtG0adOs35j/X2/t2rXLNRYAUDOd+gdAnL/8/f15jwdQZmXNMVI5g9QXX3xRnmGlhIeHKyAgQOvWrXMHqby8PKWmpio2NrZU/z/+n6qff/5ZCQkJevHFF3X55ZdXSE0AAAAA8GfKFaQqitPpVGxsrObMmaN69eqpcePGmj17tkJCQhQTE6MTJ07o4MGDqlOnjvz9/XXZZZd5jC+5WMWll15aanohAAAAAFSWMl/+vLKMGTNGgwYN0uTJkzV48GB5e3tr0aJFcjqdysrKUs+ePZWSkmJ3mQAAAADgZusRKenkJdMTEhKUkJBQalmTJk20devWM47t2rXrWZcDAAAAQGWw/YgUAAAAAFQ3BCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFjkY3cBQHW1Y8cOZWRkVPp2MjMzdeTIkUrfTlUJCAhQaGhopW7jsssuU4sWLSp1GwAAoGYjSAHl9NJLL2nTpk12l4HTaNu2rWbMmGF3GQAA4AJGkALKKS4ujiNS5VBVR6QAAAAqE0EKKKcWLVowfQwAAKCG4mITAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgkY/dBQAAUJ25ivLsLgGnweMCoLIRpAAAKIfAwED5Op0q2LfW7lJwBr5OpwIDA+0uA8AFiiAFAEA5BAcH64Xnn1de3oVx5CMzM1Pz5s1TfHy8QkND7S6nQgQGBio4ONjuMgBcoAhSAACUU3Bw8AX3QT00NFQtW7a0uwwAOO9xsQkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAItsD1Iul0tJSUnq1auXIiMjNXz4cGVkZJyx/6ZNmzR06FB17NhR0dHReuKJJy6YS88CAAAAqB5sD1LJyclavny5pk+frrfeeksOh0NxcXEqKioq1TcnJ0f33nuvmjZtqvfff1/Jycn64YcfNGHCBBsqBwAAAFBT2RqkioqKtHjxYo0ePVp9+vRReHi4EhMTtX//fq1atapU/71796pXr1568skn1axZM1155ZW6/fbbtWbNGhuqBwAAAFBT2Rqk0tLSdPToUUVHR7vbAgMDFRERofXr15fq37FjR82bN08+Pif/j3B6erref/999ejRo8pqBgAAAAAfOzeenZ0tSWrUqJFHe3BwsLKyss469i9/+Yt27dqlxo0bKzk5+ZzqMMbo2LFj57QOAACqs4KCAvd33hMB1FTGGDkcjjL1tTVI5efnS5KcTqdHu5+fn3Jzc886ds6cOSooKNCcOXM0ZMgQffDBB7rooovKVUdxcbG2bNlSrrEAAFwI9u3bJ0nauXOnCgsLba4GAOzzx2xyJrYGKX9/f0knz5Uq+VmSCgsLVatWrbOObdeunSRp/vz56tOnj1atWqUBAwaUqw5fX1+1bNmyXGMBALgQ+Pn5SZKaN2+uFi1a2FwNANgjPT29zH1tDVIlU/pycnLUtGlTd3tOTo7Cw8NL9d++fbv27NmjPn36uNuCg4MVFBSk/fv3l7sOh8Oh2rVrl3s8AADVXckfNP39/XlPBFBjlXVan2TzxSbCw8MVEBCgdevWudvy8vKUmpqqqKioUv1Xr16thx56SEeOHHG37d69W4cOHdLll19eJTUDAAAAgK1Byul0KjY2VnPmzNHnn3+utLQ0jR07ViEhIYqJidGJEyd04MAB9wmw/fv3V506dZSQkKBffvlF3333ncaMGaP27dvr6quvtvOmAAAAAKhBbP+HvGPGjNGgQYM0efJkDR48WN7e3lq0aJGcTqeysrLUs2dPpaSkSJIuvvhiLV26VC6XS4MHD9aDDz6oiIgILVq0SN7e3jbfEgAAAAA1ha3nSEmSt7e3EhISlJCQUGpZkyZNtHXrVo+25s2ba+HChVVVHgAAAACUYvsRKQAAAACobghSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAssj1IuVwuJSUlqVevXoqMjNTw4cOVkZFxxv6//PKL7rvvPnXt2lXdunXTmDFjtG/fviqsGAAAAEBNZ3uQSk5O1vLlyzV9+nS99dZbcjgciouLU1FRUam+hw4d0r333quLLrpIr7/+ul566SUdOnRII0eOVGFhoQ3VAwAAAKiJbA1SRUVFWrx4sUaPHq0+ffooPDxciYmJ2r9/v1atWlWq/2effab8/HzNnDlTV1xxhdq2bavZs2dr+/bt+uGHH2y4BQAAAABqIluDVFpamo4eParo6Gh3W2BgoCIiIrR+/fpS/bt166bnnntOfn5+pZbl5uZWaq0AAAAAUMLHzo1nZ2dLkho1auTRHhwcrKysrFL9mzRpoiZNmni0LVy4UH5+furcuXO56zDG6NixY+UeDwBAdVdQUOD+znsigJrKGCOHw1GmvrYGqfz8fEmS0+n0aPfz8yvTEaalS5fqzTff1KRJk1S/fv1y11FcXKwtW7aUezwAANVdyYWbdu7cyXnHAGq0P2aTM7E1SPn7+0s6ea5Uyc+SVFhYqFq1ap1xnDFGzz77rJ5//nmNGjVKw4YNO6c6fH191bJly3NaBwAA1VnJtPnmzZurRYsWNlcDAPZIT08vc19bg1TJlL6cnBw1bdrU3Z6Tk6Pw8PDTjikuLtakSZP08ccfa/z48RoxYsQ51+FwOFS7du1zXg8AANVVyR80/f39eU8EUGOVdVqfZPPFJsLDwxUQEKB169a52/Ly8pSamqqoqKjTjhk/frw++eQTzZ07t0JCFAAAAABYZesRKafTqdjYWM2ZM0f16tVT48aNNXv2bIWEhCgmJkYnTpzQwYMHVadOHfn7+2vFihVKSUnR+PHj1aVLFx04cMC9rpI+AAAAAFDZbP+HvGPGjNGgQYM0efJkDR48WN7e3lq0aJGcTqeysrLUs2dPpaSkSJI+/vhjSdL//u//qmfPnh5fJX0AAAAAoLLZekRKkry9vZWQkKCEhIRSy5o0aaKtW7e6f1+8eHFVlgYAAAAAp2X7ESkAAAAAqG4IUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwyMfuAgAAwNllZ2fryJEjlbqNzMxMj++VKSAgQCEhIZW+HQCoTAQpAADOY7m5uRo1apRcLleVbG/evHmVvg0vLy8tXbpUQUFBlb4tAKgsBCkAAM5jQUFBWrhwYaUfkapKAQEBhCgA1R5BCgCA8xzT4ADg/MPFJgAAAADAIoIUAAAAAFhke5ByuVxKSkpSr169FBkZqeHDhysjI6NM40aMGKH58+dXQZUAAAAA8H9sD1LJyclavny5pk+frrfeeksOh0NxcXEqKio645iCggIlJCTom2++qcJKAQAAAOAkW4NUUVGRFi9erNGjR6tPnz4KDw9XYmKi9u/fr1WrVp12zA8//KBbb71VP//8swIDA6u4YgAAAACwOUilpaXp6NGjio6OdrcFBgYqIiJC69evP+2Y1atXKyYmRitXrlSdOnWqqlQAAAAAcLP18ufZ2dmSpEaNGnm0BwcHKysr67RjHnrooQqvwxijY8eOVfh6AQAAAFQfxhg5HI4y9bU1SOXn50uSnE6nR7ufn59yc3OrrI7i4mJt2bKlyrYHAAAA4Pz0x2xyJrYGKX9/f0knz5Uq+VmSCgsLVatWrSqrw9fXVy1btqyy7QEAAAA4/6Snp5e5r61BqmRKX05Ojpo2bepuz8nJUXh4eJXV4XA4VLt27SrbHgAAAIDzT1mn9Uk2X2wiPDxcAQEBWrdunbstLy9PqampioqKsrEyAAAAADgzW49IOZ1OxcbGas6cOapXr54aN26s2bNnKyQkRDExMTpx4oQOHjyoOnXqeEz9AwAAAAA72f4PeceMGaNBgwZp8uTJGjx4sLy9vbVo0SI5nU5lZWWpZ8+eSklJsbtMAAAAAHBzGGOM3UXYaePGjZKkdu3a2VwJAAAAADtZyQa2H5ECAAAAgOqGIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAs8rG7AACornbs2KGMjIxK305mZqaOHDlS6dupKgEBAQoNDa307Vx22WVq0aJFpW8HAFAzEaQAoJxeeuklbdq0ye4ycAZt27bVjBkz7C4DAHCBIkgBQDnFxcVxRKocqvKIFAAAlYUgBQDl1KJFC6aOAQBQQ3GxCQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFtgcpl8ulpKQk9erVS5GRkRo+fLgyMjLO2P/QoUN65JFH1LlzZ3Xu3FmPP/64jh07VoUVAwAAAKjpbA9SycnJWr58uaZPn6633npLDodDcXFxKioqOm3/MWPGKDMzU0uWLFFSUpL+85//aOrUqVVcNQAAAICazNYgVVRUpMWLF2v06NHq06ePwsPDlZiYqP3792vVqlWl+v/444/69ttvNWPGDLVp00bdunXTtGnT9MEHH2j//v023AIAAAAANZGtQSotLU1Hjx5VdHS0uy0wMFARERFav359qf7fffedGjRooMsvv9zd1qVLFzkcDn3//fdVUjMAAAAA+Ni58ezsbElSo0aNPNqDg4OVlZVVqv/+/ftL9XU6napbt+5p+5eVMYbzrAAAAIAazhgjh8NRpr62Bqn8/HxJJ8PQqfz8/JSbm3va/n/sW9K/sLCw3HUUFxdry5Yt5R4PAAAA4MJwurxxOrYGKX9/f0knz5Uq+VmSCgsLVatWrdP2P91FKAoLC1W7du1y1+Hr66uWLVuWezwAAACA6i89Pb3MfW0NUiXT9HJyctS0aVN3e05OjsLDw0v1DwkJ0WeffebRVlRUpN9//10NGzYsdx0Oh+OcghgAAACA6q+s0/okmy82ER4eroCAAK1bt87dlpeXp9TUVEVFRZXq37lzZ2VnZ3v8n6mSsVdeeWXlFwwAAAAAsvmIlNPpVGxsrObMmaN69eqpcePGmj17tkJCQhQTE6MTJ07o4MGDqlOnjvz9/RUZGakrr7xSY8eO1ZQpU3Ts2DE9+eSTGjBgQLmPSBUXF8sYo40bN1bwrQMAAABQnRQVFZX5qJTt/5B3zJgxGjRokCZPnqzBgwfL29tbixYtktPpVFZWlnr27KmUlBRJJw+1LViwQE2aNNHQoUP18MMPq3fv3poyZUq5t+9wOCwdwgMAAABwYbKSDRzGGFPJ9QAAAADABcX2I1IAAAAAUN0QpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUvDQt29fzZ8/3+4yUIVWr16ta665Rm3bttWsWbO0efNm3XTTTWrbtq0eeuihPx2/Z88etWrVSuvWrTtjn4kTJ+qee+6pyLJxilatWmnFihWaP3+++vbta3c5qAQljzFQUfbt26d//OMf7t+tvP/fc889mjhxYmWVdkGqyfdZRX+2PJ9eD33sLgCAvebOnavQ0FAtWbJEF110kSZPniyHw6GPP/5YAQEBdpcHAKgEEyZMUOPGjXXjjTdKkt599135+fmVaez8+fPl7e1dmeUBZ/TNN9+oTp06dpchiSAF1Hh5eXnq27evmjRp4v49IiJCzZo1s7cwAECVqVevXpn71q1bt/IKAf5EgwYN7C7Bjal91UCrVq2UmJioq6++Wj169NCOHTuUnZ2tcePGqXv37mrTpo369OmjxMREuVwuSdKKFSvUt29fvf/++4qJiVHbtm1122236ccff3Sv9/Dhw5owYYKioqLUrVs3LVmypNS2f/zxRw0ZMkSdOnVS165d9eijjyo3N9e9vG/fvnrttdc0evRoRUZGqnfv3nrnnXf0448/asCAAYqMjNSdd96p3bt3V/r9dCH76quvNHDgQEVGRqpbt26aOHGi+3HIzMzUgw8+6H6Mxo4dq19//VWSVFRUpLlz57qn7nXt2lXx8fE6dOiQpJP71t69e/Xcc8+pVatW6tu3r7799lutXLnSY7reypUrdcstt6h9+/bq27evXnjhBfe+9kfGGCUnJ6t3797q0KGDHnvsMRUWFlbBvVQzZGdn64EHHlDHjh111VVXeUzNKZGcnKzo6Gh16tRJTzzxhI4dO+ZedrZ9STq3/alkmuc///lP3X777WrXrp369eund999t5LvlZrlt99+0w033KAhQ4YoPz9fK1asUExMjFJSUtS3b1+1b99eI0aM0P79+/XUU0+pc+fO6t69uxYuXOixnvfee0/XX3+92rdvr+uvv16vvvqqx/P6+++/17333qtOnTqpbdu2uummm/Txxx+7l0+cOFEJCQmaNWuWunXrpsjISP3tb3/TgQMH3H1WrlypG2+8Ue3atVOvXr301FNPqaioqPLvpBrg999/19SpU9WnTx+1b99egwcP1nfffSfp5BGjwYMHa+HChYqOjlbnzp01adIkHTlyRNLJaWbffvut3n//ffd04FOnX+Xn5+uxxx5Tjx491K5dOw0YMECffvqpe9unTlMr7/5XHZ3p9XPdunVq1aqVPv/8c1177bXq0KGDhg0bpu3bt3uMP3r0qB599FFFRUWpU6dOmjhxosfr8/bt2xUXF6eOHTuqZ8+eeuSRRzyeT/fcc48effRR3X777YqKitLKlSsl/flzuSzOto5FixYpIiJCGzZskCS5XC7dc889GjhwoPv5/J///Ed33nmn+7Pg3LlzdeLEiVLbWbFihVq1auXRVnL/7dmzR5K0a9cujRgxQp06dVLHjh01YsQIbd261d3/j1P7zvYZpdLflwzOe2FhYaZr165mw4YN5scffzTGGHPLLbeYe++916Smpprdu3ebpUuXmrCwMLNq1SpjjDHvvfeeadOmjfnrX/9qfvzxR7Np0yYzaNAgExMTY1wulzHGmOHDh5vrrrvOrF+/3qSmppohQ4aYsLAwk5SUZIwx5ueffzZt2rQxU6dONb/88otZu3atufHGG83AgQPNiRMnjDHGXH311SYyMtK89dZbZvfu3ebxxx83ERER5uabbzZr1qwxGzZsMH379jWjR4+u+jvuAvHbb7+Ztm3bmtdff93s2bPHfPfdd6Zv377m0UcfNXl5eaZnz55m2LBhZsOGDSY1NdXcfvvt5s477zTGGPM///M/5qqrrjJr1641e/bsMf/+979Nly5dzFNPPWWMMSYnJ8f07t3bzJw50+Tk5JgDBw6YO+64wzz00EMmJyfHFBYWmldeecW9/Z07d5oPP/zQREVFmRkzZhhjjMnMzDRhYWFm7dq1xhhjXnjhBdOxY0fz0Ucfme3bt5unn37ahIWFmdjYWHvuwAtIcXGxufHGG80dd9xhNm3aZH744QfTv39/ExYWZt577z2TlJTkvq83b95s1q1bZ6699lpz//33G2POvi8ZY855fyrZF3r37m0+++wzk56ebh577DETHh5udu/ebc+ddoEoeYx/++03c+ONN5p7773X5OfnG2P+7/U+NjbWbNmyxXz33Xemc+fOpkuXLmbmzJlmx44d5tlnnzVhYWFm69atxhhjli9fbjp37mw++ugjs3v3bvPJJ5+YHj16mFmzZhljjMnOzjbt27c3M2fONLt27TLp6elm4sSJpm3btubAgQPGGGMmTJhg2rRpYyZOnGjS09PN119/bbp06WImTZpkjDFmy5Ytpk2bNuaf//yn2bt3r/n6669N586dzXPPPWfDPXhhOX78uLn11lvNTTfdZNasWWPS09PNlClTTJs2bcyGDRtMUlKSadOmjbnzzjvNpk2bzNq1a02/fv3MiBEjjDHGHDp0yP1a/9tvvxljTr6fl7z/z5gxw9x2221m06ZNZvfu3Wbu3LkmIiLCZGZmGmOMiY2NNRMmTDDGlG//q47O9vq5du1aExYWZq6++mrz5ZdfmrS0NDNixAjTvXt3k5eXZ4w5eZ+FhYWZuXPnmoyMDPPZZ5+Zdu3amWeeecYYc/I516VLFzN16lSTnp5uNm7caO677z7Tt29fc/ToUfc6WrVqZT788EOzbds2c/DgwT99LpfFn63jxIkTJjY21tx0002mqKjILFy40HTo0MHs3LnTGGPMTz/9ZMLDw82MGTNMenq6Wb16tenatauZN2+eMcZz33rvvfdMWFiYx/ZL7r+S/evWW281EydONDt37jS//PKLGTlypLnmmmvc/UteD40xZf6MUlnvSwSpaiAsLMw8/fTT7t/z8/PNokWLzJ49ezz69ezZ0yxYsMAY8387ampqqnv5qlWrTFhYmNm/f7/Zvn27CQsLM//973/dyw8cOGDatm3r3tkfeughM3DgQI9tpKWlmbCwMPPll18aY04+OU4NSb/88osJCwszb7/9trtt9uzZ5i9/+cu53g01VmpqqgkLCzNffPGFu23btm1my5YtZvny5SYyMtIcOnTIY9ns2bNNQUGBWblypVm3bp3H+uLj482QIUPcv5/6AmeM5xuky+Uy3bt3NzNnzvRYx9KlS02bNm1MXl6eR5ByuVymR48eJjEx0aN///79CVIV4OuvvzZhYWEmIyPD3Vayf5QEqVM/6BpjzDfffGPCwsLMrl27zrovGWPOeX8q2RdeeeUV9/K8vDwTFhZmPvroo4q8K2qcsLAws3jxYnPzzTebkSNHmoKCAveyktf7Uz+kjhkzxvTu3dv9h7OCggITFhZmPv74Y2OMMb179zYvv/yyxzbeffdd065dO1NQUGB2795tXnzxRfcfzYwxZufOnSYsLMysX7/eGHMySHXt2tUUFRW5+zz11FPm2muvNcacfM9p27at2bhxo3v5hg0bzI4dOyrqbqmxvvzyy1KPucvlMrfeeqt56KGH3K8F2dnZ7uVfffWVCQsLM9u3bzfGeL7WG+P5XvDAAw+YoUOHukPA8ePHzVdffeURCk4NUlb3v+robK+fJUHgX//6l3vZoUOHTGRkpFm2bJkx5uR9dtttt3ms8/777zcjR440xhiTmJhobrrpJo/lx44dM+3bt3eHhtjYWDNgwACPPn/2XC6Lsqxj7969plOnTmbcuHGmTZs2ZsWKFe6+8fHx5q9//avH+E8//dS8/vrrxhjrQapTp05mzpw5pri42Bhz8o++a9eudb8elbznWfmMUlnvS5wjVU1cdtll7p/9/f0VGxurTz75RK+++qoyMjKUlpamnJycUodyL7/8cvfPJSfmFRcXa9u2bZKkdu3auZdfcsklCg0Ndf++bds29ejRw2N9rVq1UmBgoLZu3ao+ffpIkpo3b+5RmyT3+TaS5Ofnx1SOc9C6dWvddNNNuv/++9WoUSN1795dV111lfr27au3335bzZo185ivfsUVV2jcuHGSpP79+2vNmjWaN2+edu3ape3bt2vHjh2Kiooq07YPHjyoX3/9VZ06dfJo79y5s4qLi7Vjxw7Vr1/f3X7o0CEdOHDAY7+SpA4dOpSa4gDrtm3bpqCgIDVt2tTd1rp1a9WqVcv9e7NmzXTJJZe4f4+MjJQk/fLLL7rmmmvOuC9J0tatWytkfzrT6w7Ozbx581RcXKw2bdqc9qIAp74W16pVS02aNJHD4ZAkd//CwkIdPHhQ2dnZevbZZ7VgwQL3GJfLpcLCQu3Zs0eXX365brvtNr3++utKT0/Xrl27tGXLFknymK5z2WWXydfX1/17nTp13I91r1691LFjR912221q1qyZunfvrn79+qlt27YVeK/UTNu2bVOdOnUUFhbmbnM4HIqKitLq1at1+eWXq1mzZmrYsKF7eceOHd1jW7Rocdb1x8XF6f7771e3bt3UsWNH9ejRQzfeeONZT/Av6/5XXZ3tvfj777+XJHXp0sXdv27dumrWrJn785bkeR9JUlBQkPbu3StJSk1N1fbt292PU4nCwkKP989TPw+W9bl8NmVdx6WXXqpJkybp0Ucf1TXXXKNbb73V3Xfr1q3q3r27x3pjYmLOut2zGTt2rJ5++mktW7ZM0dHR6tWrl66//np5eXmekWTlM0plvS8RpKqJkoAinZy7fPfddys/P1/XX3+9+vfvr8cff1x33313qXFOp7NUmzHG/fMfg5ePj49Hv5IXwVO5XC6PN85Tx5T4486OczN37lw9+OCD+vrrr/Xf//5X8fHxuvLKKxUeHn7ax6jElClTlJKSogEDBuiqq67SAw88oEWLFmn//v1l2u6p+8qpSj5Ine6xP924M/WDdad7TE69f/94Ja2Sx6rkOXumfWnp0qXy8fGpkP3pz153UD7du3fXoEGD9Pe//13XX3+9evfu7bH81Ndl6cyvwyWv+5MmTSr14UeSGjVqpO3bt2vw4MGKiIhQjx491K9fP1188cW6/fbbPfqe7rEu4efnp6VLlyo1NVXffPONvvnmGy1fvlwDBgzQjBkzynSbcXpne38ueT344/5Q8riX5Wp7HTt21FdffaX//Oc/WrNmjd59913Nnz9fL7/8srp163baMWXd/6qzM71+Pvjgg5JKv9e5XC6P++Fs973L5VJ0dLSefPLJUstODbCnfh4sy3P5z1hZx6ZNm+Tj46ONGzcqNzdXQUFBkvSn7x2nc+o+fPz4cY9ld999t6677jp99dVX7j/ezZ8/XytXrvT4Q6GVzyiV9b504e3lNcDq1au1efNmvfbaaxozZoxuuOEGBQQE6LfffivzThERESFJ+uGHH9xteXl5HheFCAsLc5+4WiItLU1Hjhz5079woOL89NNPevrpp9WiRQsNGzZML774op5++mmtW7dOl1xyiXbt2qXDhw+7+6empqpr167au3evli1bpilTpujRRx/VwIED1bp1a+3YsaPM+0n9+vVVv35991/bSnz33Xfy9fX1ODIinbzqU6NGjUr137RpUzlvPU4VERGhvLw8/fLLL+62nTt3ejz+u3btcp9QLp28YIDD4VDLli3Pui/99ttvatmyZaXuTzg3f/nLXxQTE6Mbb7xRjz/+uMfjbEXJ83r37t267LLL3F+bN2/WM888I0latmyZ6tevryVLliguLk59+vRxX3SkrI/3V199pQULFigiIkL33Xefli5dqjFjxiglJaVcdeP/tGrVSnl5eR5HO6STz/eWLVtKKv3aUHKxqdatW//p+pOSkvT999+rX79+mjx5sv71r38pNDRU//rXvyrwVlQvf/b6KUkbN2509z948KAyMjLUpk2bMq3/iiuu0Pbt29WoUSP3czIoKEhPP/10qce5RFmey3+mrOtYvXq1li1bpvnz56tWrVoege/yyy/3uO2StGTJEo+jViVKAvep+2ZGRob7519//VXTpk1TcXGxBg4cqNmzZ+vDDz/UgQMH9O2335629rJ+RqkMBKlqKCQkRJL04Ycfau/evfruu+/0t7/9TcXFxWWeQte0aVNdd911mjZtmv773/9q27ZtGj9+vMf4YcOGKS0tTdOmTdP27dv17bffaty4cYqIiDjjX6RQ8QICAvTmm29q9uzZysjI0NatW/WPf/xDzZo10z333KOgoCAlJCQoLS1NmzZt0pQpUxQWFqaGDRuqTp06+vzzz93jHn/8cW3evLnM+4nD4dDw4cP1+uuv64033lBGRoY++ugjLViwQHfcccdpp3nExcXpjTfe0DvvvKOdO3fqmWeecV/pB+ema9euioyM1Pjx4/XTTz9p48aNmjhxosdfPAsLC/Xwww8rNTVV//nPf/Q///M/GjBggBo3bnzWfeniiy/WzTffXKn7EyrG5MmTVVBQUO6jOg6HQyNHjtRrr72m1157Tbt379Znn32mqVOnyul0yul0KiQkRNnZ2frqq6+0d+9effrpp5oyZYoklfnx9vHx0XPPPaclS5YoMzNTGzdu1L///e9SU5dgXY8ePdSqVSs98sgjWrdunbZv366pU6dq27ZtGjp0qCTp2LFjGj9+vLZt26Y1a9Zo2rRpuuGGG9xT7y+66CLt3btX2dnZpdafkZGhJ598UmvWrNHevXv1ySefaN++fTX6sfuz109Jmjp1qtavX6+0tDSNGzdODRo00HXXXVem9d911106fPiw4uPjtWXLFqWlpemRRx7Rhg0bdMUVV5x2TFmey3+mLOv4/fff3VcL7Nu3r5566il98skn+vDDDyVJI0eO1E8//aRnnnlGO3fu1FdffaWFCxeqX79+pbbXoUMHeXl56ZlnnlFmZqa+/PJLLV682L28bt26+vLLLzV58mRt2bJFmZmZevPNN+Xr61tqWnB5PqNUNObbVEPt27fXpEmTtGTJEj3zzDNq2LChbrjhBjVq1Eg///xzmdcza9Ys/e///q/Gjh0rl8ulO+64QwcPHnQv79ixo1566SU9++yzGjBggAICAnTNNdfokUceKXUIH5WnZcuWmj9/vhYsWKA333xTXl5eio6O1ksvvaRatWpp0aJFmjlzpgYPHiyn06m+fftq/Pjx8vHx0bPPPquZM2e6PyCXXK76hRde0LFjx1S7du0/3f7IkSPldDr16quvasaMGQoJCVFcXJxGjBhx2v533323XC6Xnn/+ef3666/q1auXBg0apJ07d1b0XVPjeHl5aeHChZo+fbqGDx8uf39/jRo1yn3JWElq27atWrdurSFDhsjhcOiGG25wX6b4bPuSl5fXOe9PqBr16tXTpEmTNGHChDJ/SPuj4cOHy8/PT6+99ppmzZql+vXra+DAgRo7dqwkaciQIdqxY4f7D2zNmjVTfHy8kpKStGHDhlLTCk+nR48eeuqpp7R48WIlJibK399fffr0ce+PKD8fHx+98sormjVrlkaPHq2ioiK1adNGS5YsUYcOHbR69Wo1atRIYWFhuuuuu+Tj46Obb77Zfb6jJN15552aMGGCbrnlFq1Zs8Zj/VOnTtWsWbOUkJCg33//XY0bN9a4cePUv3//qr6p542zvX5mZWVJkm6//XaNGzdOeXl5io6O1tKlSz3OYT2b0NBQvf7665o7d67uuusueXt7q0OHDnr11Vc9zkX+oz97LpfFn63jySeflLe3tyZMmCBJioqK0l133aVp06YpKipKrVu3VnJyspKSkvTyyy+rQYMGuueee3T//fef9nZOmzZNL7zwgt5++221adNGjz76qB544AFJJ/ftl156SbNmzdKwYcOUn5+v1q1b68UXXzztESarn1EqmsMwJwMAAOCCMX/+fL3//vv64osv7C6lRli3bp2GDBmizz//3ONiW7jwMbUPAAAAACxiah8AAABwgUlJSdFjjz121j5DhgyxNA0QnpjaBwAAAFxgjh496r7a5pkEBga6L5YB6whSAAAAAGAR50gBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAW7dmzR61atdKKFSvsLgUAYBOCFAAAAABYRJACAAAAAIsIUgCA89LmzZs1dOhQderUSR07dtSwYcP0888/u5e/8847GjhwoDp06KD27durf//+SklJcS9fsWKF2rVrp++//1633Xab2rVrp7/85S/64osvtGPHDg0dOlSRkZGKiYnRP/7xD49xrVq10s8//6xbb71V7du318033+yx7tPZt2+f4uPj1aVLF0VGRmro0KFKTU11Ly+ZDvjKK6/o+uuvV5cuXZgaCADVGEEKAHDeOXLkiEaOHKmLL75YSUlJSkxMVH5+vkaMGKHDhw/rjTfe0BNPPKF+/fpp4cKFmj17tnx9fZWQkKB9+/a513P8+HHFx8frzjvvVHJysvz8/DRu3Djdf//9uuqqq/Tss8+qQYMGmjBhgrKzsz1qGDVqlPr166cFCxaoefPmio+P1+eff37aeg8ePKg777xTmzdv1uOPP665c+fK5XLp7rvv1vbt2z36JiYmasSIEZo+fbqio6Mr/s4DAFQJH7sLAADgj9LT03Xw4EHdc8896tSpkySpRYsWWr58uY4cOaLMzEwNHz5cDz74oHtMkyZNNHDgQP3www+69NJLJUkul0v333+/br/9dklSXl6e4uPjNXToUN17772SpEsuuUS33XabNm3apJCQEPf6YmNj9fe//12S1KtXL916661KTk5Wv379StX76quv6vfff9eyZcvUuHFjSVLv3r11ww036Nlnn1VSUpK777XXXqtBgwZV5N0FALABQQoAcN654oorVK9ePT3wwAO6/vrr1adPH3Xr1k3jx4+XJE2cOFGSdPjwYe3atUu7du3SmjVrJEnFxcUe6+rYsaP750suuUSS1KFDB3db3bp1JZ0MWafq37+/+2eHw6GYmBjNnz9f+fn5pepds2aNWrdurYYNG+r48eOSJC8vL/Xu3VsffvihR9+wsLAy3w8AgPMXQQoAcN656KKL9MYbb+j5559XSkqKli9frlq1aumWW27RY489pv379+uJJ57Q2rVr5ePjoxYtWqhVq1aSJGOMx7oCAgJKrd/f3/9Pa2jYsKHH7/Xr15cxRocPHy7V9/fff1dGRobatGlz2nWdGr5KwhwAoHojSAEAzkstWrTQ7NmzdeLECW3YsEEffPCBli1bpiZNmmjFihXy9fXV22+/rYiICPn4+Cg9Pb3U0Z9zcejQIY8w9euvv8rb21t169ZVTk6OR986deqoS5cu7iNmf+R0OiusLgDA+YGLTQAAzjuffPKJoqOjdeDAAXl7e6tjx46aMmWKAgMDtW/fPu3cuVODBg1S+/bt5eNz8m+CX3/9taST50VVhC+++ML9szFGn376qTp16nTaUNSlSxft3LlTzZs3V7t27dxfH374od555x15e3tXSE0AgPMHR6QAAOedK6+8Ui6XSw8++KDuu+8+XXTRRfrnP/+pw4cP67rrrtPXX3+tN954QyEhIQoMDNQ333yjV199VZJOew5TecyePVtFRUVq3ry53nnnHW3fvt29jT8aNmyYPvjgAw0bNkzDhw/XxRdfrJSUFL399tuaNGlShdQDADi/cEQKAHDeCQ4O1ssvv6w6deroscce06hRo7R582bNnz9f0dHRSk5OVsOGDTVx4kQ9/PDD+umnn/T888+rRYsW+u677yqkhilTpujtt9/W3//+dx04cECLFy9WVFTUafs2bNhQy5cvV+PGjTVlyhTdf//92rBhg5566ikNGzasQuoBAJxfHOaPZ+UCAFCDrVixQpMmTdLnn3+uJk2a2F0OAOA8xREpAAAAALCIIAUAAAAAFjG1DwAAAAAs4ogUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwKL/B9AtM5AsILm5AAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.set(rc={'figure.figsize': (10, 6)})\n",
"sns.set_style('whitegrid')\n",
"sns.set_context('notebook')\n",
"sns.boxplot(x='sampler',y='r2',data=output_df)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "56d9c7d3",
"metadata": {},
"outputs": [],
"source": [
"df['murcko'] = df.SMILES.apply(MurckoScaffold.MurckoScaffoldSmiles)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "85a3614c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"murcko\n",
"c1ccc(Nc2ncnc3ccc(-c4ccccc4)cc23)cc1 58\n",
"c1ccc(-c2ccc3ncnc(Nc4cccc5[nH]ncc45)c3c2)cc1 40\n",
"c1ccc(-c2[nH]c(C3CC3)nc2-c2ccncn2)cc1 27\n",
"O=C(Nc1ccc2c(c1)OCO2)c1ccccc1NCc1ccncc1 19\n",
"c1ccc(-c2ccc3nccc(Nc4cccc5[nH]ncc45)c3c2)cc1 16\n",
"c1ccc(Nc2ncnc3[nH]ccc23)cc1 15\n",
"O=c1oc2ccccc2c(=O)n1Cc1ccccc1 13\n",
"N=C1CCc2cc(-c3c[nH]nc3-c3ccncc3)ccc21 13\n",
"c1ccc(-c2c(-c3ccncc3)nn3c(C4CC5CCC(C4)N5)ccnc23)cc1 12\n",
"c1ccc(Nc2ncnc3ccccc23)cc1 12\n",
"c1ccc2c(c1)[nH]c1cnccc12 12\n",
"c1ccc(Nc2ncccc2-c2ncnc3[nH]cnc23)cc1 12\n",
"c1ccc(Nc2cncc(-c3ccccc3)n2)cc1 12\n",
"c1cc(-c2c(-c3ccncc3)nn3c(C4CC5CCC(C4)N5)ccnc23)c2cn[nH]c2c1 12\n",
"O=C(Nc1ccon1)c1cccc(Nc2ncnc3cncnc23)c1 12\n",
"c1ccc(Nc2nc3cc(Oc4ccncc4)ccc3[nH]2)cc1 10\n",
"O=C(Nc1cccc(Nc2ccnc(NC(=O)C3CC3)c2)c1)c1ccccc1 9\n",
"O=C(Nc1ccccc1)c1cccc(Nc2ncccc2-c2ncnc3[nH]cnc23)c1 9\n",
"c1ccc(-c2ncnc3sccc23)cc1 9\n",
"c1ccc(-c2nc(NC3CC3)nc3sccc23)cc1 8\n",
"c1ccc(Nc2ncnc3cnc(N4CCNCC4)nc23)cc1 8\n",
"O=C(Nc1cccc(-c2ccnc3ccnn23)c1)c1ccccc1 8\n",
"O=C(Nc1cccc(Nc2ncnc3cncnc23)c1)c1ccccc1 8\n",
"O=C(Nc1ccccc1)Nc1cccc(-c2n[nH]cc2-c2ccnc3[nH]ccc23)c1 8\n",
"c1ccc(Nc2ncnc3[nH]cc(-c4ccccc4)c23)cc1 7\n",
"c1ccc(Nc2ncnc3[nH]c(-c4ccccc4)cc23)cc1 7\n",
"O=C(Nc1cccc(Nc2ncccc2-c2ncnc3[nH]cnc23)c1)c1ccccc1 6\n",
"c1cc(-c2nn(C3CCNCC3)cc2-c2ccc3c(c2)Cc2c[nH]nc2-3)ccn1 6\n",
"c1ccc(-c2c(-c3ccncc3)nn3c(-c4ccc(N5CC6CC5CN6)cc4)ccnc23)cc1 6\n",
"O=C(Nc1ccn[nH]1)c1cccc(Nc2ncnc3cncnc23)c1 6\n",
"Name: count, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.murcko.value_counts().head(30)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cee79cf",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment