Skip to content

Instantly share code, notes, and snippets.

@tyoc213
Last active February 21, 2021 02:16
Show Gist options
  • Save tyoc213/1527e3e26f0d037466077949becf0063 to your computer and use it in GitHub Desktop.
Save tyoc213/1527e3e26f0d037466077949becf0063 to your computer and use it in GitHub Desktop.
1527e3e26f0d037466077949becf0063
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"colab": {
"name": "1527e3e26f0d037466077949becf0063",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/tyoc213/1527e3e26f0d037466077949becf0063/notebook.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BSeGBDqwQfn9",
"outputId": "71d04026-7285-45ec-d48d-67689e1b4df9"
},
"source": [
"!pip install git+https://github.com/fastai/fastcore\n",
"!pip install git+https://github.com/fastai/fastai"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting git+https://github.com/fastai/fastcore\n",
" Cloning https://github.com/fastai/fastcore to /tmp/pip-req-build-u9tj6lv5\n",
" Running command git clone -q https://github.com/fastai/fastcore /tmp/pip-req-build-u9tj6lv5\n",
"Requirement already satisfied (use --upgrade to upgrade): fastcore==1.3.20 from git+https://github.com/fastai/fastcore in /usr/local/lib/python3.6/dist-packages\n",
"Requirement already satisfied: pip in /usr/local/lib/python3.6/dist-packages (from fastcore==1.3.20) (19.3.1)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from fastcore==1.3.20) (20.9)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->fastcore==1.3.20) (2.4.7)\n",
"Building wheels for collected packages: fastcore\n",
" Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for fastcore: filename=fastcore-1.3.20-cp36-none-any.whl size=52135 sha256=89cde11b817ee7f4340e8b203666ecc0c440f9a3bc6d722bee196fc1788aac0d\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-xqq4a_mv/wheels/8a/2a/23/bc50c8f5e28776b44ac837a01fcfa675724565d4813d8e51c7\n",
"Successfully built fastcore\n",
"Collecting git+https://github.com/fastai/fastai\n",
" Cloning https://github.com/fastai/fastai to /tmp/pip-req-build-x276prwt\n",
" Running command git clone -q https://github.com/fastai/fastai /tmp/pip-req-build-x276prwt\n",
"Requirement already satisfied (use --upgrade to upgrade): fastai==2.2.5 from git+https://github.com/fastai/fastai in /usr/local/lib/python3.6/dist-packages\n",
"Requirement already satisfied: pip in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (19.3.1)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (20.9)\n",
"Requirement already satisfied: fastcore<1.4,>=1.3.8 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (1.3.20)\n",
"Requirement already satisfied: torchvision<0.9,>=0.8 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (0.8.1+cu101)\n",
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (3.2.2)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (1.1.5)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (2.23.0)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (3.13)\n",
"Requirement already satisfied: fastprogress>=0.2.4 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (1.0.0)\n",
"Requirement already satisfied: pillow>6.0.0 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (7.0.0)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (0.22.2.post1)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (1.4.1)\n",
"Requirement already satisfied: spacy<3 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (2.2.4)\n",
"Requirement already satisfied: torch<1.8,>=1.7.0 in /usr/local/lib/python3.6/dist-packages (from fastai==2.2.5) (1.7.0+cu101)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->fastai==2.2.5) (2.4.7)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torchvision<0.9,>=0.8->fastai==2.2.5) (1.19.5)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->fastai==2.2.5) (0.10.0)\n",
"Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->fastai==2.2.5) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->fastai==2.2.5) (1.3.1)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->fastai==2.2.5) (2018.9)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->fastai==2.2.5) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->fastai==2.2.5) (1.25.11)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->fastai==2.2.5) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->fastai==2.2.5) (2020.12.5)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->fastai==2.2.5) (1.0.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (53.0.0)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (4.41.1)\n",
"Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (0.4.1)\n",
"Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (7.4.0)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (0.8.2)\n",
"Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (1.1.3)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (2.0.5)\n",
"Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (1.0.5)\n",
"Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (1.0.0)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (1.0.5)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy<3->fastai==2.2.5) (3.0.5)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch<1.8,>=1.7.0->fastai==2.2.5) (3.7.4.3)\n",
"Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch<1.8,>=1.7.0->fastai==2.2.5) (0.8)\n",
"Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch<1.8,>=1.7.0->fastai==2.2.5) (0.16.0)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib->fastai==2.2.5) (1.15.0)\n",
"Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy<3->fastai==2.2.5) (3.4.0)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy<3->fastai==2.2.5) (3.4.0)\n",
"Building wheels for collected packages: fastai\n",
" Building wheel for fastai (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for fastai: filename=fastai-2.2.5-cp36-none-any.whl size=192190 sha256=fc6c627296d172edac3e92aa8ec39afc4956d32dc998e0147b9a75c1e9359fee\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-_prjjef9/wheels/83/30/a0/6fa8a74c9f5a5ab45cdc84e9f9ed56d8a72750e11ebf50a364\n",
"Successfully built fastai\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VZceFtoCQnIv",
"outputId": "2867f249-3b77-4a29-991e-8284d532cdbe"
},
"source": [
"!git clone https://github.com/ohmeow/blurr\n",
"!pip install blurr\n",
"!pwd"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"text": [
"fatal: destination path 'blurr' already exists and is not an empty directory.\n",
"Requirement already satisfied: blurr in /usr/local/lib/python3.6/dist-packages (0.4.1)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from blurr) (2.8.1)\n",
"Requirement already satisfied: docopt in /usr/local/lib/python3.6/dist-packages (from blurr) (0.6.2)\n",
"Requirement already satisfied: smart-open in /usr/local/lib/python3.6/dist-packages (from blurr) (4.1.2)\n",
"Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from blurr) (1.17.12)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from blurr) (3.13)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil->blurr) (1.15.0)\n",
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->blurr) (0.3.4)\n",
"Requirement already satisfied: botocore<1.21.0,>=1.20.12 in /usr/local/lib/python3.6/dist-packages (from boto3->blurr) (1.20.12)\n",
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->blurr) (0.10.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.25.4 in /usr/local/lib/python3.6/dist-packages (from botocore<1.21.0,>=1.20.12->boto3->blurr) (1.25.11)\n",
"/content\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CAiDgsmUQdHi",
"outputId": "d9af7e1f-ed2a-4cda-89da-f9944bb22bfc"
},
"source": [
"!pip install swifter"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: swifter in /usr/local/lib/python3.6/dist-packages (1.0.7)\n",
"Requirement already satisfied: ipywidgets>=7.0.0cloudpickle>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from swifter) (7.6.3)\n",
"Requirement already satisfied: tqdm>=4.33.0 in /usr/local/lib/python3.6/dist-packages (from swifter) (4.41.1)\n",
"Requirement already satisfied: pandas>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from swifter) (1.1.5)\n",
"Requirement already satisfied: bleach>=3.1.1 in /usr/local/lib/python3.6/dist-packages (from swifter) (3.3.0)\n",
"Requirement already satisfied: parso>0.4.0 in /usr/local/lib/python3.6/dist-packages (from swifter) (0.8.1)\n",
"Requirement already satisfied: modin[ray]>=0.8.1.1 in /usr/local/lib/python3.6/dist-packages (from swifter) (0.8.3)\n",
"Requirement already satisfied: psutil>=5.6.6 in /usr/local/lib/python3.6/dist-packages (from swifter) (5.8.0)\n",
"Requirement already satisfied: dask[dataframe]>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from swifter) (2.12.0)\n",
"Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (3.5.1)\n",
"Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (4.10.1)\n",
"Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= \"3.6\" in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (1.0.0)\n",
"Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.1.2)\n",
"Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (4.3.3)\n",
"Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.5.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=1.0.0->swifter) (2.8.1)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=1.0.0->swifter) (2018.9)\n",
"Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.6/dist-packages (from pandas>=1.0.0->swifter) (1.19.5)\n",
"Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from bleach>=3.1.1->swifter) (1.15.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach>=3.1.1->swifter) (20.9)\n",
"Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach>=3.1.1->swifter) (0.5.1)\n",
"Requirement already satisfied: pyarrow==1.0; extra == \"ray\" in /usr/local/lib/python3.6/dist-packages (from modin[ray]>=0.8.1.1->swifter) (1.0.0)\n",
"Requirement already satisfied: ray>=1.0.0; extra == \"ray\" in /usr/local/lib/python3.6/dist-packages (from modin[ray]>=0.8.1.1->swifter) (1.2.0)\n",
"Requirement already satisfied: fsspec>=0.6.0; extra == \"dataframe\" in /usr/local/lib/python3.6/dist-packages (from dask[dataframe]>=2.10.0->swifter) (0.8.5)\n",
"Requirement already satisfied: toolz>=0.7.3; extra == \"dataframe\" in /usr/local/lib/python3.6/dist-packages (from dask[dataframe]>=2.10.0->swifter) (0.11.1)\n",
"Requirement already satisfied: partd>=0.3.10; extra == \"dataframe\" in /usr/local/lib/python3.6/dist-packages (from dask[dataframe]>=2.10.0->swifter) (1.1.0)\n",
"Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.6/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.3.1)\n",
"Requirement already satisfied: jupyter-client in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.3.5)\n",
"Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.1.1)\n",
"Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.2.0)\n",
"Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (2.6.0)\n",
"Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (4.7.1)\n",
"Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.3.1->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (4.4.2)\n",
"Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.8.1)\n",
"Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (1.0.18)\n",
"Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (4.8.0)\n",
"Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.7.5)\n",
"Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (53.0.0)\n",
"Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (2.6.1)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->bleach>=3.1.1->swifter) (2.4.7)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.13)\n",
"Requirement already satisfied: aiohttp-cors in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.7.0)\n",
"Requirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.32.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (2.23.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.7.3)\n",
"Requirement already satisfied: redis>=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.5.3)\n",
"Requirement already satisfied: opencensus in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.7.12)\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (7.1.2)\n",
"Requirement already satisfied: gpustat in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.6.0)\n",
"Requirement already satisfied: aioredis in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.3.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.0.12)\n",
"Requirement already satisfied: colorama in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.4.4)\n",
"Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.0.2)\n",
"Requirement already satisfied: py-spy>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.3.4)\n",
"Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.9.0)\n",
"Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.12.4)\n",
"Requirement already satisfied: colorful in /usr/local/lib/python3.6/dist-packages (from ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.5.4)\n",
"Requirement already satisfied: locket in /usr/local/lib/python3.6/dist-packages (from partd>=0.3.10; extra == \"dataframe\"->dask[dataframe]>=2.10.0->swifter) (0.2.1)\n",
"Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.9.2)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (2.11.3)\n",
"Requirement already satisfied: Send2Trash in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (1.5.0)\n",
"Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (5.6.1)\n",
"Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (22.0.2)\n",
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.2.5)\n",
"Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.7.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (2020.12.5)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.25.11)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (2.10)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.6.3)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (20.3.0)\n",
"Requirement already satisfied: idna-ssl>=1.0; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.1.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (5.1.0)\n",
"Requirement already satisfied: async-timeout<4.0,>=3.0 in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.0.1)\n",
"Requirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.6/dist-packages (from aiohttp->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (3.7.4.3)\n",
"Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.16.0)\n",
"Requirement already satisfied: opencensus-context==0.1.2 in /usr/local/lib/python3.6/dist-packages (from opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.1.2)\n",
"Requirement already satisfied: blessings>=1.6 in /usr/local/lib/python3.6/dist-packages (from gpustat->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.7)\n",
"Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.6/dist-packages (from gpustat->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (7.352.0)\n",
"Requirement already satisfied: hiredis in /usr/local/lib/python3.6/dist-packages (from aioredis->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.1.0)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (1.1.1)\n",
"Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (1.4.3)\n",
"Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.4.4)\n",
"Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.3)\n",
"Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.6.0)\n",
"Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0cloudpickle>=0.2.2->swifter) (0.8.4)\n",
"Requirement already satisfied: google-auth<2.0dev,>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.25.0)\n",
"Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (1.52.0)\n",
"Requirement already satisfied: contextvars; python_version >= \"3.6\" and python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from opencensus-context==0.1.2->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (2.4)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (4.2.1)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.2.8)\n",
"Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (4.7)\n",
"Requirement already satisfied: immutables>=0.9 in /usr/local/lib/python3.6/dist-packages (from contextvars; python_version >= \"3.6\" and python_version < \"3.7\"->opencensus-context==0.1.2->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.15)\n",
"Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0,>=1.0.0->opencensus->ray>=1.0.0; extra == \"ray\"->modin[ray]>=0.8.1.1->swifter) (0.4.8)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VSKDXtyBQdHo",
"outputId": "de620be0-493e-464f-9dc2-82cccb5f4c87"
},
"source": [
"# less than 100MB total\n",
"!wget -P data/full_dataset/ https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_1.csv\n",
"!wget -P data/full_dataset/ https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_2.csv\n",
"!wget -P data/full_dataset/ https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_3.csv"
],
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"text": [
"--2021-02-21 02:06:48-- https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_1.csv\n",
"Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.2.112, 172.217.15.112, 172.217.12.240, ...\n",
"Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.2.112|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 14174600 (14M) [application/octet-stream]\n",
"Saving to: ‘data/full_dataset/goemotions_1.csv’\n",
"\n",
"goemotions_1.csv 100%[===================>] 13.52M 40.8MB/s in 0.3s \n",
"\n",
"2021-02-21 02:06:48 (40.8 MB/s) - ‘data/full_dataset/goemotions_1.csv’ saved [14174600/14174600]\n",
"\n",
"--2021-02-21 02:06:49-- https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_2.csv\n",
"Resolving storage.googleapis.com (storage.googleapis.com)... 172.253.62.128, 172.217.5.240, 142.250.31.128, ...\n",
"Connecting to storage.googleapis.com (storage.googleapis.com)|172.253.62.128|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 14173154 (14M) [application/octet-stream]\n",
"Saving to: ‘data/full_dataset/goemotions_2.csv’\n",
"\n",
"goemotions_2.csv 100%[===================>] 13.52M --.-KB/s in 0.1s \n",
"\n",
"2021-02-21 02:06:49 (142 MB/s) - ‘data/full_dataset/goemotions_2.csv’ saved [14173154/14173154]\n",
"\n",
"--2021-02-21 02:06:49-- https://storage.googleapis.com/gresearch/goemotions/data/full_dataset/goemotions_3.csv\n",
"Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.2.112, 172.217.7.144, 172.217.164.144, ...\n",
"Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.2.112|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 14395164 (14M) [application/octet-stream]\n",
"Saving to: ‘data/full_dataset/goemotions_3.csv’\n",
"\n",
"goemotions_3.csv 100%[===================>] 13.73M 43.7MB/s in 0.3s \n",
"\n",
"2021-02-21 02:06:49 (43.7 MB/s) - ‘data/full_dataset/goemotions_3.csv’ saved [14395164/14395164]\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "eQYWJ39bQdHq"
},
"source": [
"!cat data/full_dataset/goemotions_1.csv data/full_dataset/goemotions_2.csv data/full_dataset/goemotions_3.csv > goemotions.csv"
],
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "DVJXMN9eQdHq"
},
"source": [
"from fastai.text.all import *"
],
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "k7Th-mxZQdHq",
"outputId": "f2be21ac-95d3-424f-d559-6cc3ac14c945"
},
"source": [
"path = Path(\"\")\n",
"path.ls()"
],
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(#5) [Path('.config'),Path('data'),Path('goemotions.csv'),Path('blurr'),Path('sample_data')]"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mDcxf_xkQdHt",
"outputId": "08ad3afa-786e-4386-af11-0baf17c77bde"
},
"source": [
"!head goemotions.csv"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"text": [
"text,id,author,subreddit,link_id,parent_id,created_utc,rater_id,example_very_unclear,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral\n",
"That game hurt.,eew5j0j,Brdd9,nrl,t3_ajis4z,t1_eew18eq,1548381039.0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0\n",
"\" >sexuality shouldn’t be a grouping category It makes you different from othet ppl so imo it fits the definition of \"\"grouping\"\" \",eemcysk,TheGreen888,unpopularopinion,t3_ai4q37,t3_ai4q37,1548084169.0,37,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n",
"\"You do right, if you don't care then fuck 'em!\",ed2mah1,Labalool,confessions,t3_abru74,t1_ed2m7g7,1546427744.0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1\n",
"Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,t3_ahulml,t3_ahulml,1547965054.0,18,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0\n",
"\"[NAME] was nowhere near them, he was by the Falcon. \",eda6yn6,American_Fascist713,starwarsspeculation,t3_ackt2f,t1_eda65q2,1546668601.0,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1\n",
"\"Right? Considering it’s such an important document, I should know the damned thing backwards and forwards... thanks again for the help!\",eespn2i,ImperialBoss,TrueReddit,t3_aizyuz,t1_eesoak0,1548280208.0,61,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0\n",
"\"He isn't as big, but he's still quite popular. I've heard the same thing about his content. Never watched him much.\",eczuekb,Rallings,cringe,t3_abeksv,t1_eczsmp8,1546320076.0,3,False,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n",
"That's crazy; I went to a super [RELIGION] high school and I think I can remember 2 girls the entire 4 years that became teen moms.,ed5tx8y,Beachy5313,TeenMomOGandTeenMom2,t3_ac10u9,t1_ed5tfyc,1546536158.0,23,False,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n",
"that's adorable asf,ef961hv,RedRails1917,traaaaaaannnnnnnnnns,t3_akz2ia,t3_akz2ia,1548763543.0,73,False,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "U39MhYgfQdHu",
"outputId": "03fa7135-3487-48ff-ca55-a9e941c9aa7b"
},
"source": [
"df = pd.read_csv(path/'goemotions.csv') #, header=0, names=['label', 'text'])\n",
"df"
],
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36) have mixed types.Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
],
"name": "stderr"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>id</th>\n",
" <th>author</th>\n",
" <th>subreddit</th>\n",
" <th>link_id</th>\n",
" <th>parent_id</th>\n",
" <th>created_utc</th>\n",
" <th>rater_id</th>\n",
" <th>example_very_unclear</th>\n",
" <th>admiration</th>\n",
" <th>amusement</th>\n",
" <th>anger</th>\n",
" <th>annoyance</th>\n",
" <th>approval</th>\n",
" <th>caring</th>\n",
" <th>confusion</th>\n",
" <th>curiosity</th>\n",
" <th>desire</th>\n",
" <th>disappointment</th>\n",
" <th>disapproval</th>\n",
" <th>disgust</th>\n",
" <th>embarrassment</th>\n",
" <th>excitement</th>\n",
" <th>fear</th>\n",
" <th>gratitude</th>\n",
" <th>grief</th>\n",
" <th>joy</th>\n",
" <th>love</th>\n",
" <th>nervousness</th>\n",
" <th>optimism</th>\n",
" <th>pride</th>\n",
" <th>realization</th>\n",
" <th>relief</th>\n",
" <th>remorse</th>\n",
" <th>sadness</th>\n",
" <th>surprise</th>\n",
" <th>neutral</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>That game hurt.</td>\n",
" <td>eew5j0j</td>\n",
" <td>Brdd9</td>\n",
" <td>nrl</td>\n",
" <td>t3_ajis4z</td>\n",
" <td>t1_eew18eq</td>\n",
" <td>1.54838e+09</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>&gt;sexuality shouldn’t be a grouping category It makes you different from othet ppl so imo it fits the definition of \"grouping\"</td>\n",
" <td>eemcysk</td>\n",
" <td>TheGreen888</td>\n",
" <td>unpopularopinion</td>\n",
" <td>t3_ai4q37</td>\n",
" <td>t3_ai4q37</td>\n",
" <td>1.54808e+09</td>\n",
" <td>37</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>You do right, if you don't care then fuck 'em!</td>\n",
" <td>ed2mah1</td>\n",
" <td>Labalool</td>\n",
" <td>confessions</td>\n",
" <td>t3_abru74</td>\n",
" <td>t1_ed2m7g7</td>\n",
" <td>1.54643e+09</td>\n",
" <td>37</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Man I love reddit.</td>\n",
" <td>eeibobj</td>\n",
" <td>MrsRobertshaw</td>\n",
" <td>facepalm</td>\n",
" <td>t3_ahulml</td>\n",
" <td>t3_ahulml</td>\n",
" <td>1.54797e+09</td>\n",
" <td>18</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[NAME] was nowhere near them, he was by the Falcon.</td>\n",
" <td>eda6yn6</td>\n",
" <td>American_Fascist713</td>\n",
" <td>starwarsspeculation</td>\n",
" <td>t3_ackt2f</td>\n",
" <td>t1_eda65q2</td>\n",
" <td>1.54667e+09</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211222</th>\n",
" <td>Everyone likes [NAME].</td>\n",
" <td>ee6pagw</td>\n",
" <td>Senshado</td>\n",
" <td>heroesofthestorm</td>\n",
" <td>t3_agjf24</td>\n",
" <td>t3_agjf24</td>\n",
" <td>1.54763e+09</td>\n",
" <td>16</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211223</th>\n",
" <td>Well when you’ve imported about a gazillion of them I or your country it’s gets serious.</td>\n",
" <td>ef28nod</td>\n",
" <td>5inchloser</td>\n",
" <td>nottheonion</td>\n",
" <td>t3_ak26t3</td>\n",
" <td>t3_ak26t3</td>\n",
" <td>1.54855e+09</td>\n",
" <td>15</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211224</th>\n",
" <td>That looks amazing</td>\n",
" <td>ee8hse1</td>\n",
" <td>springt1me</td>\n",
" <td>shittyfoodporn</td>\n",
" <td>t3_agrnqb</td>\n",
" <td>t3_agrnqb</td>\n",
" <td>1.54768e+09</td>\n",
" <td>70</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211225</th>\n",
" <td>The FDA has plenty to criticize. But like here, it's usually criticized horribly off base. It needs to grow some balls and actually enforce things.</td>\n",
" <td>edrhoxh</td>\n",
" <td>enamedata</td>\n",
" <td>medicine</td>\n",
" <td>t3_aejqzd</td>\n",
" <td>t1_edrgdtx</td>\n",
" <td>1.54717e+09</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211226</th>\n",
" <td>Desktop link: ^^/r/HelperBot_ ^^Downvote ^^to ^^remove. ^^Counter: ^^231558</td>\n",
" <td>edze9g4</td>\n",
" <td>HelperBot_</td>\n",
" <td>MorbidReality</td>\n",
" <td>t3_afhw30</td>\n",
" <td>t1_edze91s</td>\n",
" <td>1.5474e+09</td>\n",
" <td>61</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>211227 rows × 37 columns</p>\n",
"</div>"
],
"text/plain": [
" text ... neutral\n",
"0 That game hurt. ... 0\n",
"1 >sexuality shouldn’t be a grouping category It makes you different from othet ppl so imo it fits the definition of \"grouping\" ... 0\n",
"2 You do right, if you don't care then fuck 'em! ... 1\n",
"3 Man I love reddit. ... 0\n",
"4 [NAME] was nowhere near them, he was by the Falcon. ... 1\n",
"... ... ... ...\n",
"211222 Everyone likes [NAME]. ... 0\n",
"211223 Well when you’ve imported about a gazillion of them I or your country it’s gets serious. ... 0\n",
"211224 That looks amazing ... 0\n",
"211225 The FDA has plenty to criticize. But like here, it's usually criticized horribly off base. It needs to grow some balls and actually enforce things. ... 0\n",
"211226 Desktop link: ^^/r/HelperBot_ ^^Downvote ^^to ^^remove. ^^Counter: ^^231558 ... 0\n",
"\n",
"[211227 rows x 37 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 212
},
"id": "HCWaVMUxQdHv",
"outputId": "fb3d4228-db9c-43a8-aba6-0fc114acb9f9"
},
"source": [
"df.describe()"
],
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>id</th>\n",
" <th>author</th>\n",
" <th>subreddit</th>\n",
" <th>link_id</th>\n",
" <th>parent_id</th>\n",
" <th>created_utc</th>\n",
" <th>rater_id</th>\n",
" <th>example_very_unclear</th>\n",
" <th>admiration</th>\n",
" <th>amusement</th>\n",
" <th>anger</th>\n",
" <th>annoyance</th>\n",
" <th>approval</th>\n",
" <th>caring</th>\n",
" <th>confusion</th>\n",
" <th>curiosity</th>\n",
" <th>desire</th>\n",
" <th>disappointment</th>\n",
" <th>disapproval</th>\n",
" <th>disgust</th>\n",
" <th>embarrassment</th>\n",
" <th>excitement</th>\n",
" <th>fear</th>\n",
" <th>gratitude</th>\n",
" <th>grief</th>\n",
" <th>joy</th>\n",
" <th>love</th>\n",
" <th>nervousness</th>\n",
" <th>optimism</th>\n",
" <th>pride</th>\n",
" <th>realization</th>\n",
" <th>relief</th>\n",
" <th>remorse</th>\n",
" <th>sadness</th>\n",
" <th>surprise</th>\n",
" <th>neutral</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>2.112270e+05</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" <td>211227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>57733</td>\n",
" <td>58012</td>\n",
" <td>49179</td>\n",
" <td>484</td>\n",
" <td>44898</td>\n",
" <td>54925</td>\n",
" <td>8.320000e+04</td>\n",
" <td>164</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>Thank you.</td>\n",
" <td>eed0ric</td>\n",
" <td>[deleted]</td>\n",
" <td>cringe</td>\n",
" <td>t3_ae6ejj</td>\n",
" <td>t3_ae6ejj</td>\n",
" <td>1.548463e+09</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>45</td>\n",
" <td>5</td>\n",
" <td>3882</td>\n",
" <td>895</td>\n",
" <td>92</td>\n",
" <td>64</td>\n",
" <td>1.000000e+01</td>\n",
" <td>8873</td>\n",
" <td>175547</td>\n",
" <td>163989</td>\n",
" <td>170706</td>\n",
" <td>171638</td>\n",
" <td>166981</td>\n",
" <td>163578</td>\n",
" <td>173407</td>\n",
" <td>172198</td>\n",
" <td>170309</td>\n",
" <td>175213</td>\n",
" <td>171260</td>\n",
" <td>168757</td>\n",
" <td>174016</td>\n",
" <td>176370</td>\n",
" <td>173722</td>\n",
" <td>175734</td>\n",
" <td>168646</td>\n",
" <td>177906</td>\n",
" <td>171724</td>\n",
" <td>171529</td>\n",
" <td>176907</td>\n",
" <td>171078</td>\n",
" <td>177367</td>\n",
" <td>171068</td>\n",
" <td>177373</td>\n",
" <td>176322</td>\n",
" <td>172719</td>\n",
" <td>173789</td>\n",
" <td>131747</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text id author ... sadness surprise neutral\n",
"count 211227 211227 211227 ... 211227 211227 211227\n",
"unique 57733 58012 49179 ... 5 5 5\n",
"top Thank you. eed0ric [deleted] ... 0 0 0\n",
"freq 45 5 3882 ... 172719 173789 131747\n",
"\n",
"[4 rows x 37 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gQ-SP0Z1QdHw"
},
"source": [
"# TextDataLoaders.from_df??"
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b-mqLg_CQdHw",
"outputId": "f92027bd-325c-4ea1-ae92-e8fb5b026b63"
},
"source": [
"df.iloc[0]"
],
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"text That game hurt.\n",
"id eew5j0j\n",
"author Brdd9\n",
"subreddit nrl\n",
"link_id t3_ajis4z\n",
"parent_id t1_eew18eq\n",
"created_utc 1.54838e+09\n",
"rater_id 1\n",
"example_very_unclear False\n",
"admiration 0\n",
"amusement 0\n",
"anger 0\n",
"annoyance 0\n",
"approval 0\n",
"caring 0\n",
"confusion 0\n",
"curiosity 0\n",
"desire 0\n",
"disappointment 0\n",
"disapproval 0\n",
"disgust 0\n",
"embarrassment 0\n",
"excitement 0\n",
"fear 0\n",
"gratitude 0\n",
"grief 0\n",
"joy 0\n",
"love 0\n",
"nervousness 0\n",
"optimism 0\n",
"pride 0\n",
"realization 0\n",
"relief 0\n",
"remorse 0\n",
"sadness 1\n",
"surprise 0\n",
"neutral 0\n",
"Name: 0, dtype: object"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Hc2unk57QdHx"
},
"source": [
"categories = 'admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral'\n",
"categories = categories.split(',')"
],
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SEdKPhMDQdHy",
"outputId": "31945e58-624d-4679-8f49-e8d0b960594d"
},
"source": [
"df.iloc[0][list(df.iloc[0][categories]==1).index(True)+9]"
],
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1"
]
},
"metadata": {
"tags": []
},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "DqNAUd4aQdHy",
"outputId": "72bc546a-4fe9-4cdc-ea12-6a51670bbc48"
},
"source": [
"categories[list(df.iloc[0][categories]==1).index(True)]"
],
"execution_count": 28,
"outputs": [
{
"output_type": "execute_result",
"data": {
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
},
"text/plain": [
"'sadness'"
]
},
"metadata": {
"tags": []
},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vv0dThceQdHz"
},
"source": [
"def get_emotion(row):\n",
" l = list(row[categories]==1)\n",
" if True in l: return categories[l.index(True)]\n",
" else: return 'unknown'\n",
"\n",
"assert get_emotion(df.iloc[0]) == 'sadness'"
],
"execution_count": 29,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "UcdYoAxtQdH0"
},
"source": [
"Create a new column based on `get_emotion` https://stackoverflow.com/a/26887820"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ro1OXaRZQdH0"
},
"source": [
"use all cores with swifter https://stackoverflow.com/a/51669468"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "G87VCv28QdH0",
"outputId": "36fa5d2b-5bfc-4d53-c190-d88cd2efd3b0"
},
"source": [
"%%time\n",
"\n",
"import swifter\n",
"\n",
"df['emotion'] = df.swifter.apply (lambda row: get_emotion(row), axis=1)\n",
"df"
],
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"text": [
"CPU times: user 4.26 s, sys: 608 ms, total: 4.87 s\n",
"Wall time: 2min 7s\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jBjAV8AvSShU",
"outputId": "941b00c7-acaf-42c1-aa9d-ebba1216d0a1"
},
"source": [
"#!pip install transformers"
],
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting transformers\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/98/87/ef312eef26f5cecd8b17ae9654cdd8d1fae1eb6dbd87257d6d73c128a4d0/transformers-4.3.2-py3-none-any.whl (1.8MB)\n",
"\u001b[K |████████████████████████████████| 1.8MB 17.6MB/s \n",
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
"Collecting sacremoses\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
"\u001b[K |████████████████████████████████| 890kB 51.8MB/s \n",
"\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.9)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (1.19.5)\n",
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.8)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
"Collecting tokenizers<0.11,>=0.10.1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/fd/5b/44baae602e0a30bcc53fbdbc60bd940c15e143d252d658dfdefce736ece5/tokenizers-0.10.1-cp36-cp36m-manylinux2010_x86_64.whl (3.2MB)\n",
"\u001b[K |████████████████████████████████| 3.2MB 58.5MB/s \n",
"\u001b[?25hRequirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from transformers) (3.4.0)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.0.0)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.12.5)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.25.11)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.0)\n",
"Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n",
"Building wheels for collected packages: sacremoses\n",
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893261 sha256=2bf5ee0909f35a66ea63def8b9c4c2b021af03024a3683476cc6e424ccd01dee\n",
" Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
"Successfully built sacremoses\n",
"Installing collected packages: sacremoses, tokenizers, transformers\n",
"Successfully installed sacremoses-0.0.43 tokenizers-0.10.1 transformers-4.3.2\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "IQQGOs2TQdH0"
},
"source": [
"import torch\n",
"from transformers import *\n",
"from fastai.text.all import *\n"
],
"execution_count": 34,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TNwSBbwlSiGM"
},
"source": [
"#!pwd\n",
"#!ls\n",
"#!pip install -e blurr"
],
"execution_count": 35,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "CJetih80SuWk"
},
"source": [
"#%cd blurr/\n",
"#!pwd\n",
"#!pip install -e ."
],
"execution_count": 36,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nMvSd22KTqSw"
},
"source": [
"# !pwd\n",
"# !pip install -e \".[dev]\""
],
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "spLJukDLUEUC"
},
"source": [
"#!pip uninstall blurr"
],
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "F-D8rbYNUOI9"
},
"source": [
"#!pip install ohmeow-blurr"
],
"execution_count": 39,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 341
},
"id": "cCo0ElqISgtT",
"outputId": "a8f79669-8e9a-4315-ebe2-5403adf767d3"
},
"source": [
"\n",
"from blurr.data.all import *\n",
"from blurr.modeling.all import *"
],
"execution_count": 40,
"outputs": [
{
"output_type": "error",
"ename": "ModuleNotFoundError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-40-f1cd5904a267>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mblurr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mall\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mblurr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodeling\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mall\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'blurr.data'",
"",
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VjSPCTSDQdH1"
},
"source": [
"Get your hugs objects"
]
},
{
"cell_type": "code",
"metadata": {
"id": "YJg0CER-QdH1"
},
"source": [
"%%time\n",
"task = HF_TASKS_AUTO.SequenceClassification\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Jp7O9RRlTWmz"
},
"source": [
"%%time\n",
"\n",
"pretrained_mode_name = 'bert-base-uncased'\n",
"hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_mode_name, task=task)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "wfiFobpmQdH2"
},
"source": [
"Build your data blocks and your DataLoaders"
]
},
{
"cell_type": "code",
"metadata": {
"id": "MRoXBswLQdH2"
},
"source": [
"%%time\n",
"# single input\n",
"\n",
"blocks = (HF_TextBlock(hf_arch, hf_config, hf_tokenizer, hf_model), CategoryBlock)\n",
"dblocks = DataBlock(blocks=blocks, get_x=ColReader('text'), get_y=ColReader('emotion'), splitter=RandomSplitter())\n",
"\n",
"dls = dblocks.dataloaders(df, bs=16)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "cg-2VSp2QdH2"
},
"source": [
"dls.show_batch(dataloaders=dls, max_n=2)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "wWi6rx1DQdH2"
},
"source": [
"%%time\n",
"model = HF_BaseModelWrapper(hf_model)\n",
"\n",
"learn = Learner(dls, model, opt_func=partial(Adam, decouple_wd=True),\n",
" loss_func=CrossEntropyLossFlat(),\n",
" metrics=[accuracy],\n",
" cbs=[HF_BaseModelCallback],\n",
" splitter=hf_splitter)\n",
"\n",
"\n",
"learn.model = learn.model.cuda()\n",
"learn.dls = learn.dls.cuda()\n",
"#learn.create_opt()\n",
"#learn.freeze()\n",
"\n",
"learn.fit_one_cycle(3, lr_max=1e-3)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "fVUVzoeuQdH4"
},
"source": [
"from fastai.test_utils import *\n",
"show_install(['blurr'])\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "8G4RRhA8QdH5"
},
"source": [
"get_env('PATH')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ZFzPIS8sQdH5"
},
"source": [
"nvidia_mem()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1nnGHxSESXWk"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment