hiromis/app_examples.ipynb

## app_examples.ipynb
{
  "cells": [
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Examples of many applications"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "This notebook is a quick(ish) test of most of the main application people use, taken from `fastbook`."
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "#hide\nfrom fastai.text.all import *",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Proposed change"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "class ColReader(DisplayedTransform):\n    \"Read `cols` in `row` with potential `pref` and `suff`\"\n    def __init__(self, cols, pref='', suff='', label_delim=None):\n        store_attr()\n        self.pref = str(pref) + os.path.sep if isinstance(pref, Path) else pref\n        self.cols = L(cols)\n\n    def _do_one(self, r, c):\n        o = r[c]if isinstance(c, int) or not c in getattr(r, '_fields', []) else getattr(r, c)\n        if len(self.pref)==0 and len(self.suff)==0 and self.label_delim is None: return o\n        if self.label_delim is None: return f'{self.pref}{o}{self.suff}'\n        else: return o.split(self.label_delim) if len(o)>0 else []\n\n    def __call__(self, o, **kwargs):\n        if len(self.cols) == 1: return self._do_one(o, self.cols[0])\n        return L(self._do_one(o, c) for c in self.cols)",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Text classification"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### Original example"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "path = untar_data(URLs.IMDB_SAMPLE)\ndf = pd.read_csv(path/'texts.csv')\nimdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n                      get_x=ColReader('text'), get_y=ColReader('label'), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": ""
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.593497</td>\n      <td>0.624740</td>\n      <td>0.630000</td>\n      <td>00:03</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.446860</td>\n      <td>0.623563</td>\n      <td>0.620000</td>\n      <td>00:07</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>0.397975</td>\n      <td>0.669177</td>\n      <td>0.655000</td>\n      <td>00:07</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.334923</td>\n      <td>0.462760</td>\n      <td>0.790000</td>\n      <td>00:07</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.274499</td>\n      <td>0.503498</td>\n      <td>0.815000</td>\n      <td>00:07</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Should be a bit under 0.8."
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "learn.predict(\"I really liked that movie!\")",
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": ""
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "execution_count": 6,
          "data": {
            "text/plain": "('positive', TensorText(1), TensorText([2.6034e-05, 9.9997e-01]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Should be a bit very nearly 1."
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### Tricky name example\n\n- `'name'` and `'cat'` was exceptions."
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.head(n=1)",
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": "      label  \\\n0  negative   \n\n                                                                                                                                                                                                                                                                                                                                                                                                                    text  \\\n0  Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!   \n\n   is_valid  \n0     False  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>label</th>\n      <th>text</th>\n      <th>is_valid</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>negative</td>\n      <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n      <td>False</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df['name'] = df['text']\ndf['cat'] = df['label']",
      "execution_count": 8,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.head(n=1)",
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "      label  \\\n0  negative   \n\n                                                                                                                                                                                                                                                                                                                                                                                                                    text  \\\n0  Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!   \n\n   is_valid  \\\n0     False   \n\n                                                                                                                                                                                                                                                                                                                                                                                                                    name  \\\n0  Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!   \n\n        cat  \n0  negative  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>label</th>\n      <th>text</th>\n      <th>is_valid</th>\n      <th>name</th>\n      <th>cat</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>negative</td>\n      <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n      <td>False</td>\n      <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n      <td>negative</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n                      get_x=ColReader('name'), get_y=ColReader('cat'), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": ""
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.613397</td>\n      <td>0.614988</td>\n      <td>0.690000</td>\n      <td>00:05</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.438083</td>\n      <td>0.545052</td>\n      <td>0.765000</td>\n      <td>00:08</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>0.377312</td>\n      <td>0.586978</td>\n      <td>0.705000</td>\n      <td>00:08</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.306670</td>\n      <td>0.590248</td>\n      <td>0.740000</td>\n      <td>00:08</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.246508</td>\n      <td>0.637076</td>\n      <td>0.755000</td>\n      <td>00:08</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### Index example\n\n- Sanity check to make sure select column by index still works"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.columns",
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 12,
          "data": {
            "text/plain": "Index(['label', 'text', 'is_valid', 'name', 'cat'], dtype='object')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "c2i = {v:i for i, v in enumerate(df.columns)}",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n                      get_x=ColReader(c2i['name']), get_y=ColReader(c2i['cat']), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": ""
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.621031</td>\n      <td>0.344544</td>\n      <td>1.000000</td>\n      <td>00:01</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<IPython.core.display.HTML object>",
            "text/html": "<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>accuracy</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.247419</td>\n      <td>0.005617</td>\n      <td>1.000000</td>\n      <td>00:01</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>0.157228</td>\n      <td>0.001115</td>\n      <td>1.000000</td>\n      <td>00:01</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.102784</td>\n      <td>0.000048</td>\n      <td>1.000000</td>\n      <td>00:01</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.070852</td>\n      <td>0.000069</td>\n      <td>1.000000</td>\n      <td>00:01</td>\n    </tr>\n  </tbody>\n</table>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## fin -"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "_draft": {
      "nbviewer_url": "https://gist.github.com/0e49abdaa0bd666b2ed69264e169b13e"
    },
    "gist": {
      "id": "0e49abdaa0bd666b2ed69264e169b13e",
      "data": {
        "description": "nbs/examples/app_examples.ipynb",
        "public": true
      }
    },
    "jupytext": {
      "split_at_heading": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3 (ipykernel)",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.10",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}
	{
	"cells": [
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Examples of many applications"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "This notebook is a quick(ish) test of most of the main application people use, taken from `fastbook`."
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "#hide\nfrom fastai.text.all import *",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Proposed change"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "class ColReader(DisplayedTransform):\n \"Read `cols` in `row` with potential `pref` and `suff`\"\n def __init__(self, cols, pref='', suff='', label_delim=None):\n store_attr()\n self.pref = str(pref) + os.path.sep if isinstance(pref, Path) else pref\n self.cols = L(cols)\n\n def _do_one(self, r, c):\n o = r[c]if isinstance(c, int) or not c in getattr(r, '_fields', []) else getattr(r, c)\n if len(self.pref)==0 and len(self.suff)==0 and self.label_delim is None: return o\n if self.label_delim is None: return f'{self.pref}{o}{self.suff}'\n else: return o.split(self.label_delim) if len(o)>0 else []\n\n def __call__(self, o, **kwargs):\n if len(self.cols) == 1: return self._do_one(o, self.cols[0])\n return L(self._do_one(o, c) for c in self.cols)",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Text classification"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Original example"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "path = untar_data(URLs.IMDB_SAMPLE)\ndf = pd.read_csv(path/'texts.csv')\nimdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n get_x=ColReader('text'), get_y=ColReader('label'), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": ""
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.593497</td>\n <td>0.624740</td>\n <td>0.630000</td>\n <td>00:03</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.446860</td>\n <td>0.623563</td>\n <td>0.620000</td>\n <td>00:07</td>\n </tr>\n <tr>\n <td>1</td>\n <td>0.397975</td>\n <td>0.669177</td>\n <td>0.655000</td>\n <td>00:07</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.334923</td>\n <td>0.462760</td>\n <td>0.790000</td>\n <td>00:07</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.274499</td>\n <td>0.503498</td>\n <td>0.815000</td>\n <td>00:07</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Should be a bit under 0.8."
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "learn.predict(\"I really liked that movie!\")",
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": ""
	},
	"metadata": {}
	},
	{
	"output_type": "execute_result",
	"execution_count": 6,
	"data": {
	"text/plain": "('positive', TensorText(1), TensorText([2.6034e-05, 9.9997e-01]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Should be a bit very nearly 1."
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Tricky name example\n\n- `'name'` and `'cat'` was exceptions."
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df.head(n=1)",
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 7,
	"data": {
	"text/plain": " label \\\n0 negative \n\n text \\\n0 Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff! \n\n is_valid \n0 False ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>label</th>\n <th>text</th>\n <th>is_valid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>negative</td>\n <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n <td>False</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df['name'] = df['text']\ndf['cat'] = df['label']",
	"execution_count": 8,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df.head(n=1)",
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 9,
	"data": {
	"text/plain": " label \\\n0 negative \n\n text \\\n0 Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff! \n\n is_valid \\\n0 False \n\n name \\\n0 Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff! \n\n cat \n0 negative ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>label</th>\n <th>text</th>\n <th>is_valid</th>\n <th>name</th>\n <th>cat</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>negative</td>\n <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n <td>False</td>\n <td>Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!</td>\n <td>negative</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n get_x=ColReader('name'), get_y=ColReader('cat'), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": ""
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.613397</td>\n <td>0.614988</td>\n <td>0.690000</td>\n <td>00:05</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.438083</td>\n <td>0.545052</td>\n <td>0.765000</td>\n <td>00:08</td>\n </tr>\n <tr>\n <td>1</td>\n <td>0.377312</td>\n <td>0.586978</td>\n <td>0.705000</td>\n <td>00:08</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.306670</td>\n <td>0.590248</td>\n <td>0.740000</td>\n <td>00:08</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.246508</td>\n <td>0.637076</td>\n <td>0.755000</td>\n <td>00:08</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Index example\n\n- Sanity check to make sure select column by index still works"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df.columns",
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 12,
	"data": {
	"text/plain": "Index(['label', 'text', 'is_valid', 'name', 'cat'], dtype='object')"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "c2i = {v:i for i, v in enumerate(df.columns)}",
	"execution_count": 13,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),\n get_x=ColReader(c2i['name']), get_y=ColReader(c2i['cat']), splitter=ColSplitter())\ndls = imdb_clas.dataloaders(df, bs=64)",
	"execution_count": 14,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": ""
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)\nlearn.fine_tune(4, 1e-2)",
	"execution_count": 15,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.621031</td>\n <td>0.344544</td>\n <td>1.000000</td>\n <td>00:01</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": "<IPython.core.display.HTML object>",
	"text/html": "<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>epoch</th>\n <th>train_loss</th>\n <th>valid_loss</th>\n <th>accuracy</th>\n <th>time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0.247419</td>\n <td>0.005617</td>\n <td>1.000000</td>\n <td>00:01</td>\n </tr>\n <tr>\n <td>1</td>\n <td>0.157228</td>\n <td>0.001115</td>\n <td>1.000000</td>\n <td>00:01</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.102784</td>\n <td>0.000048</td>\n <td>1.000000</td>\n <td>00:01</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.070852</td>\n <td>0.000069</td>\n <td>1.000000</td>\n <td>00:01</td>\n </tr>\n </tbody>\n</table>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## fin -"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"_draft": {
	"nbviewer_url": "https://gist.github.com/0e49abdaa0bd666b2ed69264e169b13e"
	},
	"gist": {
	"id": "0e49abdaa0bd666b2ed69264e169b13e",
	"data": {
	"description": "nbs/examples/app_examples.ipynb",
	"public": true
	}
	},
	"jupytext": {
	"split_at_heading": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3 (ipykernel)",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.8.10",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}