Skip to content

Instantly share code, notes, and snippets.

@oguiza
Created May 5, 2020 11:47
Show Gist options
  • Save oguiza/4a62a6400c2e2e9e72c523fbea8fb512 to your computer and use it in GitHub Desktop.
Save oguiza/4a62a6400c2e2e9e72c523fbea8fb512 to your computer and use it in GitHub Desktop.
Proba_Metrics.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"colab_type": "text",
"id": "qcMubba3e7e2"
},
"cell_type": "markdown",
"source": "# Import libraries"
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"colab_type": "code",
"id": "JIpVVG_gMx50",
"outputId": "ef43bbf6-d65c-49ef-bac0-f648197e51af",
"trusted": false
},
"cell_type": "code",
"source": "!pip install git+https://github.com/fastai/fastcore.git@master -q\n!pip install git+https://github.com/fastai/fastai2.git@master -q",
"execution_count": 1,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": " Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n Building wheel for fastai2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
}
]
},
{
"metadata": {
"colab": {},
"colab_type": "code",
"id": "F-vR9U_vRzbB",
"trusted": false
},
"cell_type": "code",
"source": "from fastai2.vision.all import *\nfrom fastai2.metrics import *\nfrom sklearn import metrics as skm",
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"colab_type": "text",
"id": "eBH9OuqoZLZ3"
},
"cell_type": "markdown",
"source": "# Proposed code"
},
{
"metadata": {
"colab": {},
"colab_type": "code",
"id": "T5p8UL7wU2bx",
"trusted": false
},
"cell_type": "code",
"source": "class AccumMetric(Metric):\n \"Stores predictions and targets on CPU in accumulate to perform final calculations with `func`.\"\n def __init__(self, func, dim_argmax=None, sigmoid=False, softmax=False, proba=False, thresh=None, to_np=False, invert_arg=False,\n flatten=True, **kwargs):\n store_attr(self,'func,dim_argmax,sigmoid,softmax,proba,thresh,flatten')\n self.to_np,self.invert_args,self.kwargs = to_np,invert_arg,kwargs\n\n def reset(self): self.targs,self.preds = [],[]\n\n def accumulate(self, learn):\n pred = learn.pred.argmax(dim=self.dim_argmax) if (self.dim_argmax and not self.proba) else learn.pred\n if self.sigmoid: pred = torch.sigmoid(pred)\n if self.thresh: pred = (pred >= self.thresh)\n if self.softmax: \n pred = F.softmax(pred, dim=-1)\n if learn.dls.c == 2: pred = pred[:, -1]\n targ = learn.y\n pred,targ = to_detach(pred),to_detach(targ)\n if self.flatten: pred,targ = flatten_check(pred,targ)\n self.preds.append(pred)\n self.targs.append(targ)\n\n @property\n def value(self):\n if len(self.preds) == 0: return\n preds,targs = torch.cat(self.preds),torch.cat(self.targs)\n if self.to_np: preds,targs = preds.numpy(),targs.numpy()\n return self.func(targs, preds, **self.kwargs) if self.invert_args else self.func(preds, targs, **self.kwargs)\n\n @property\n def name(self): return self.func.func.__name__ if hasattr(self.func, 'func') else self.func.__name__\n\ndef skm_to_fastai(func, is_class=True, thresh=None, axis=-1, sigmoid=None, softmax=False, proba=False, **kwargs):\n \"Convert `func` from sklearn.metrics to a fastai metric\"\n dim_argmax = axis if is_class and thresh is None else None\n sigmoid = sigmoid if sigmoid is not None else (is_class and thresh is not None)\n return AccumMetric(func, dim_argmax=dim_argmax, sigmoid=sigmoid, softmax=softmax, proba=proba, thresh=thresh,\n to_np=True, invert_arg=True, **kwargs)\n\ndef APScore(axis=-1, average='macro', pos_label=1, sample_weight=None):\n \"Average Precision for single-label classification problems\"\n return skm_to_fastai(skm.average_precision_score, axis=axis, flatten=False, softmax=True, proba=True,\n average=average, pos_label=pos_label, sample_weight=sample_weight)\n \ndef APScoreMultiLabel(axis=-1, average='macro', pos_label=1, sample_weight=None):\n \"Average Precision for multi-label classification problems\"\n return skm_to_fastai(skm.average_precision_score, axis=axis, flatten=False, sigmoid=True, proba=True,\n average=average, pos_label=pos_label, sample_weight=sample_weight)\n \ndef RocAuc(axis=-1, average='macro', sample_weight=None, max_fpr=None):\n \"Area Under the Receiver Operating Characteristic Curve for single-label binary classification problems\"\n return skm_to_fastai(skm.roc_auc_score, axis=axis, flatten=False, softmax=True, proba=True,\n average=average, sample_weight=sample_weight, max_fpr=max_fpr)\n \ndef RocAucMulti(axis=-1, average='macro', sample_weight=None, max_fpr=None, multi_class='ovr', labels=None):\n \"Area Under the Receiver Operating Characteristic Curve for multi-class classification problems\"\n '''multi_class= use either 'ovr' or 'ovo' (default 'ovr')\n 'ovr':\n Computes the AUC of each class against the rest. This treats the multiclass case in the same way as the multilabel case. \n Sensitive to class imbalance even when average == 'macro', because class imbalance affects the composition of each of the ‘rest’ groupings.\n 'ovo':\n Computes the average AUC of all possible pairwise combinations of classes. Insensitive to class imbalance when average == 'macro'.\n '''\n return skm_to_fastai(skm.roc_auc_score, axis=axis, flatten=False, softmax=True, proba=True,\n average=average, sample_weight=sample_weight, max_fpr=max_fpr, multi_class=multi_class, labels=labels)\n \ndef RocAucMultiLabel(axis=-1, average='macro', sample_weight=None, max_fpr=None):\n \"Area Under the Receiver Operating Characteristic Curve for multi-label classification problems\"\n return skm_to_fastai(skm.roc_auc_score, axis=axis, flatten=False, sigmoid=True, proba=True,\n average=average, sample_weight=sample_weight, max_fpr=max_fpr)",
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"colab_type": "text",
"id": "bMBMavMhLa8l"
},
"cell_type": "markdown",
"source": "# Binary:"
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 16
},
"colab_type": "code",
"id": "uajPS5RlxjrI",
"outputId": "bf0b0613-56b3-491f-ef74-d3eda58efd9a",
"trusted": false
},
"cell_type": "code",
"source": "path = untar_data(URLs.MNIST_TINY)\ndls = ImageDataLoaders.from_folder(path)",
"execution_count": 4,
"outputs": [
{
"data": {
"text/html": "",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 137
},
"colab_type": "code",
"id": "IYb6cStePQ9c",
"outputId": "c822cc80-cd5f-4e12-f933-23b8bde6a042",
"trusted": false
},
"cell_type": "code",
"source": "learn = cnn_learner(dls, resnet18, pretrained=False, metrics=[accuracy, APScore(), RocAuc()])\nlearn.fit_one_cycle(3, 0.1)",
"execution_count": 5,
"outputs": [
{
"data": {
"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>average_precision_score</th>\n <th>roc_auc_score</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>1.483519</td>\n <td>665657.562500</td>\n <td>0.505007</td>\n <td>0.505007</td>\n <td>0.500000</td>\n <td>00:02</td>\n </tr>\n <tr>\n <td>1</td>\n <td>0.980657</td>\n <td>7.075970</td>\n <td>0.841202</td>\n <td>0.981670</td>\n <td>0.981382</td>\n <td>00:02</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.634915</td>\n <td>0.189576</td>\n <td>0.975680</td>\n <td>0.999928</td>\n <td>0.999926</td>\n <td>00:02</td>\n </tr>\n </tbody>\n</table>",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 16
},
"colab_type": "code",
"id": "JVldLRaEM8sE",
"outputId": "c76f6996-4556-459f-fa36-c7edbc607ffe",
"trusted": false
},
"cell_type": "code",
"source": "valid_probas, valid_targets, valid_preds = learn.get_preds(dl=dls.valid, with_decoded=True)",
"execution_count": 6,
"outputs": [
{
"data": {
"text/html": "",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"colab_type": "code",
"id": "BaG-LDJwQncp",
"outputId": "f08c6c60-d8f2-4cdb-813a-c8a72d89e2ee",
"trusted": false
},
"cell_type": "code",
"source": "# APScore and RocAuc calculated based on probas\nprint(f'accuracy : {skm.accuracy_score(valid_targets, valid_preds):8.6f}')\nprint(f'avg precision : {skm.average_precision_score(valid_targets, valid_probas[:, 1]):8.6f}')\nprint(f'roc auc : {skm.roc_auc_score(valid_targets, valid_probas[:, 1]):8.6f}')",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "accuracy : 0.975680\navg precision : 0.999928\nroc auc : 0.999926\n"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"colab_type": "code",
"id": "8hgemM8exHj9",
"outputId": "2bd6a99a-1244-4ef2-8275-10d8253bc178",
"trusted": false
},
"cell_type": "code",
"source": "# APScore and RocAuc calculated based on preds - these were wrong!!\nprint(f'accuracy : {skm.accuracy_score(valid_targets, valid_preds):8.6f}')\nprint(f'avg precision : {skm.average_precision_score(valid_targets, valid_preds):8.6f}')\nprint(f'roc auc : {skm.roc_auc_score(valid_targets, valid_preds):8.6f}')",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "accuracy : 0.975680\navg precision : 0.976162\nroc auc : 0.975921\n"
}
]
},
{
"metadata": {
"colab_type": "text",
"id": "OPk9hKfsLf63"
},
"cell_type": "markdown",
"source": "# Multiclass:"
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "2pCXFq9Z-nCR",
"outputId": "12eb93ce-b423-4ac6-8030-eb25a346bef5",
"trusted": false
},
"cell_type": "code",
"source": "bs = 64\npath = untar_data(URLs.PETS); path\nPath.BASE_PATH = path\npath_anno = path/'annotations'\npath_img = path/'images'\nfnames = get_image_files(path_img)\ndls = ImageDataLoaders.from_name_re(\n path, fnames, pat=r'(.+)_\\d+.jpg$', item_tfms=Resize(460), bs=bs,\n batch_tfms=[*aug_transforms(size=224, min_scale=0.75), Normalize.from_stats(*imagenet_stats)])\ndls.c",
"execution_count": 24,
"outputs": [
{
"data": {
"text/plain": "37"
},
"execution_count": 24,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 133
},
"colab_type": "code",
"id": "Y8IWRml8AKh6",
"outputId": "8e7de335-bd63-4301-8f07-89014e25b9b8",
"trusted": false
},
"cell_type": "code",
"source": "learn = cnn_learner(dls, resnet34, metrics=[accuracy, RocAucMulti(), RocAucMulti(multi_class='ovo'), RocAucMulti(multi_class='ovr')]).to_fp16()\nlearn.fit_one_cycle(1)",
"execution_count": 25,
"outputs": [
{
"data": {
"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>roc_auc_score</th>\n <th>roc_auc_score</th>\n <th>roc_auc_score</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>1.268736</td>\n <td>0.371541</td>\n <td>0.887686</td>\n <td>0.997189</td>\n <td>0.997196</td>\n <td>0.997189</td>\n <td>01:15</td>\n </tr>\n </tbody>\n</table>",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py:2854: UserWarning: The default behavior for interpolate/upsample with float scale_factor will change in 1.6.0 to align with other frameworks/libraries, and use scale_factor directly, instead of relying on the computed output size. If you wish to keep the old behavior, please set recompute_scale_factor=True. See the documentation of nn.Upsample for details. \n warnings.warn(\"The default behavior for interpolate/upsample with float scale_factor will change \"\n"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 72
},
"colab_type": "code",
"id": "OR3Og5iLKjTS",
"outputId": "55fbe023-01f0-4dfc-95fc-1bf1496616d8",
"trusted": false
},
"cell_type": "code",
"source": "valid_probas, valid_targets, valid_preds = learn.get_preds(dl=dls.valid, with_decoded=True)",
"execution_count": 26,
"outputs": [
{
"data": {
"text/html": "",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py:2854: UserWarning: The default behavior for interpolate/upsample with float scale_factor will change in 1.6.0 to align with other frameworks/libraries, and use scale_factor directly, instead of relying on the computed output size. If you wish to keep the old behavior, please set recompute_scale_factor=True. See the documentation of nn.Upsample for details. \n warnings.warn(\"The default behavior for interpolate/upsample with float scale_factor will change \"\n"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 52
},
"colab_type": "code",
"id": "Fc7jATbAA6Us",
"outputId": "bf146a28-ced1-43c4-b667-9c14bb9a5cae",
"trusted": false
},
"cell_type": "code",
"source": "print(f'roc auc (\"ovo\") : {skm.roc_auc_score(valid_targets, valid_probas, multi_class=\"ovo\"):8.6f}')\nprint(f'roc auc (\"ovr\" = default) : {skm.roc_auc_score(valid_targets, valid_probas, multi_class=\"ovr\"):8.6f}')",
"execution_count": 27,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "roc auc (\"ovo\") : 0.997196\nroc auc (\"ovr\" = default) : 0.997189\n"
}
]
},
{
"metadata": {
"colab_type": "text",
"id": "dOxwFh48LlRh"
},
"cell_type": "markdown",
"source": "# Multilabel:"
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 16
},
"colab_type": "code",
"id": "FvcXWlcMMLZh",
"outputId": "f1ff006b-a22d-4688-8dcd-10c7dc1bf6d7",
"trusted": false
},
"cell_type": "code",
"source": "path = untar_data(URLs.PASCAL_2007)\ndf = pd.read_csv(path/'train.csv')",
"execution_count": 13,
"outputs": [
{
"data": {
"text/html": "",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {},
"colab_type": "code",
"id": "PxJiPxLJKPgM",
"trusted": false
},
"cell_type": "code",
"source": "def splitter(df):\n train = df.index[~df['is_valid']].tolist()\n valid = df.index[df['is_valid']].tolist()\n return train,valid\n\ndef get_x(r): return path/'train'/r['fname']\ndef get_y(r): return r['labels'].split(' ')\n\ndef accuracy_multi(inp, targ, thresh=0.5, sigmoid=True):\n \"Compute accuracy when `inp` and `targ` are the same size.\"\n if sigmoid: inp = inp.sigmoid()\n return ((inp>thresh)==targ.bool()).float().mean()\n\ndblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),\n splitter=splitter,\n get_x=get_x, \n get_y=get_y,\n item_tfms = RandomResizedCrop(128, min_scale=0.35))\ndls = dblock.dataloaders(df)",
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 77
},
"colab_type": "code",
"id": "lCGzcv60LoE4",
"outputId": "2fd83f8c-dfce-416a-8d69-ee0becd6f14a",
"trusted": false
},
"cell_type": "code",
"source": "learn = cnn_learner(dls, resnet18, metrics=[RocAucMultiLabel(), APScoreMultiLabel()])\nlearn.fit_one_cycle(1)",
"execution_count": 20,
"outputs": [
{
"data": {
"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>roc_auc_score</th>\n <th>average_precision_score</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.933339</td>\n <td>0.712109</td>\n <td>0.820677</td>\n <td>0.393835</td>\n <td>00:31</td>\n </tr>\n </tbody>\n</table>",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 16
},
"colab_type": "code",
"id": "7dUNtO0KN-wg",
"outputId": "44b1af0d-6e05-4695-cb2e-369e1bfa1163",
"trusted": false
},
"cell_type": "code",
"source": "valid_probas, valid_targets, valid_preds = learn.get_preds(dl=dls.valid, with_decoded=True)",
"execution_count": 21,
"outputs": [
{
"data": {
"text/html": "",
"text/plain": "<IPython.core.display.HTML object>"
},
"metadata": {
"tags": []
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "lbkRTw6fM15V",
"outputId": "14a6316c-3eed-4526-c971-728a806700a5",
"trusted": false
},
"cell_type": "code",
"source": "skm.roc_auc_score(valid_targets, valid_probas), skm.average_precision_score(valid_targets, valid_probas)",
"execution_count": 22,
"outputs": [
{
"data": {
"text/plain": "(0.8206773497647168, 0.3938347435213998)"
},
"execution_count": 22,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"colab": {},
"colab_type": "code",
"id": "OzMqlHtQnlhl",
"trusted": false
},
"cell_type": "code",
"source": "",
"execution_count": 0,
"outputs": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "Proba_Metrics.ipynb",
"provenance": []
},
"gist": {
"id": "",
"data": {
"description": "Proba_Metrics.ipynb",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment