davidefiocco/azureml-logging-on-transformers.ipynb

## azureml-logging-on-transformers.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "AzureML logging on transformers.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPnN7JLvxCMjoClZ+oKT686",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/davidefiocco/416c382cd51ad58cabf3eb940c040220/azureml-logging-on-transformers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BxOApI-Kj3SZ",
        "outputId": "82e4725a-2cd1-483f-a0c6-529caa6a5085",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 166
        }
      },
      "source": [
        "!pip install git+https://github.com/davidefiocco/transformers.git@c32718170899d1110a77ab116a2a60bbe326829e --quiet "
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[K     |████████████████████████████████| 2.9MB 11.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 890kB 49.8MB/s \n",
            "\u001b[K     |████████████████████████████████| 1.1MB 55.7MB/s \n",
            "\u001b[?25h  Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "B2ee9QE2pQnQ",
        "outputId": "17219060-7a40-4976-ebf1-8ffe31f4f92d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 671
        }
      },
      "source": [
        "!pip install datasets azureml-sdk --quiet "
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[K     |████████████████████████████████| 153kB 10.2MB/s \n",
            "\u001b[K     |████████████████████████████████| 245kB 18.1MB/s \n",
            "\u001b[K     |████████████████████████████████| 17.7MB 204kB/s \n",
            "\u001b[K     |████████████████████████████████| 2.0MB 51.9MB/s \n",
            "\u001b[K     |████████████████████████████████| 102kB 13.8MB/s \n",
            "\u001b[K     |████████████████████████████████| 143kB 65.9MB/s \n",
            "\u001b[K     |████████████████████████████████| 40kB 7.9MB/s \n",
            "\u001b[K     |████████████████████████████████| 153kB 67.5MB/s \n",
            "\u001b[K     |████████████████████████████████| 112kB 66.0MB/s \n",
            "\u001b[K     |████████████████████████████████| 972kB 57.5MB/s \n",
            "\u001b[K     |████████████████████████████████| 552kB 49.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 61kB 11.1MB/s \n",
            "\u001b[K     |████████████████████████████████| 727kB 62.7MB/s \n",
            "\u001b[K     |████████████████████████████████| 61kB 11.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 92kB 14.7MB/s \n",
            "\u001b[K     |████████████████████████████████| 102kB 17.0MB/s \n",
            "\u001b[K     |████████████████████████████████| 92kB 15.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 2.6MB 54.4MB/s \n",
            "\u001b[K     |████████████████████████████████| 184kB 60.5MB/s \n",
            "\u001b[K     |████████████████████████████████| 28.2MB 105kB/s \n",
            "\u001b[K     |████████████████████████████████| 8.6MB 55.1MB/s \n",
            "\u001b[K     |████████████████████████████████| 61kB 11.0MB/s \n",
            "\u001b[K     |████████████████████████████████| 307kB 62.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 204kB 64.6MB/s \n",
            "\u001b[K     |████████████████████████████████| 552kB 58.1MB/s \n",
            "\u001b[K     |████████████████████████████████| 51kB 9.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 61kB 11.3MB/s \n",
            "\u001b[K     |████████████████████████████████| 1.3MB 59.6MB/s \n",
            "\u001b[K     |████████████████████████████████| 7.9MB 151kB/s \n",
            "\u001b[K     |████████████████████████████████| 28.7MB 111kB/s \n",
            "\u001b[K     |████████████████████████████████| 92kB 14.4MB/s \n",
            "\u001b[K     |████████████████████████████████| 122kB 67.4MB/s \n",
            "\u001b[K     |████████████████████████████████| 51kB 9.6MB/s \n",
            "\u001b[?25h  Building wheel for fusepy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[31mERROR: azureml-dataset-runtime 1.16.0 has requirement pyarrow<2.0.0,>=0.17.0, but you'll have pyarrow 2.0.0 which is incompatible.\u001b[0m\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ivN47u8ltj5I",
        "outputId": "b0dec49a-c43a-49f8-ef37-151d4e3bc021",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 223
        }
      },
      "source": [
        "!wget https://raw.githubusercontent.com/huggingface/transformers/master/utils/download_glue_data.py"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-10-23 16:01:52--  https://raw.githubusercontent.com/huggingface/transformers/master/utils/download_glue_data.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 8209 (8.0K) [text/plain]\n",
            "Saving to: ‘download_glue_data.py’\n",
            "\n",
            "\rdownload_glue_data.   0%[                    ]       0  --.-KB/s               \rdownload_glue_data. 100%[===================>]   8.02K  --.-KB/s    in 0s      \n",
            "\n",
            "2020-10-23 16:01:52 (124 MB/s) - ‘download_glue_data.py’ saved [8209/8209]\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "emgqvSkyuLbo",
        "outputId": "73c0287c-398a-4fd1-89dc-a9a50e00467e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 223
        }
      },
      "source": [
        "!wget https://raw.githubusercontent.com/davidefiocco/transformers/azuremllogging/examples/text-classification/run_glue.py"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-10-23 16:01:52--  https://raw.githubusercontent.com/davidefiocco/transformers/azuremllogging/examples/text-classification/run_glue.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 17873 (17K) [text/plain]\n",
            "Saving to: ‘run_glue.py’\n",
            "\n",
            "\rrun_glue.py           0%[                    ]       0  --.-KB/s               \rrun_glue.py         100%[===================>]  17.45K  --.-KB/s    in 0.002s  \n",
            "\n",
            "2020-10-23 16:01:52 (9.74 MB/s) - ‘run_glue.py’ saved [17873/17873]\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cu-ec-RFtoN8",
        "outputId": "091b0e94-5e96-4550-e41b-8f4322bd9ee1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "!python download_glue_data.py --tasks CoLA"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Downloading and extracting CoLA...\n",
            "\tCompleted!\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "n6xIp8YDkAt5",
        "outputId": "7981bb3c-7ce2-4826-e325-492c12efc738",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "!python run_glue.py --model_name_or_path bert-base-cased \\\n",
        "                    --task_name CoLA \\\n",
        "                    --do_train \\\n",
        "                    --do_eval \\\n",
        "                    --train_file ./glue_data/CoLA/train.tsv \\\n",
        "                    --validation_file ./glue_data/CoLA/dev.tsv \\\n",
        "                    --max_seq_length 128 \\\n",
        "                    --per_device_train_batch_size 32 \\\n",
        "                    --learning_rate 2e-5 \\\n",
        "                    --num_train_epochs 3.0 \\\n",
        "                    --output_dir output \\\n",
        "                    --evaluation_strategy steps \\\n",
        "                    --logging_steps 8 \\\n",
        "                    --eval_steps 4"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-10-23 16:01:57.526496: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n",
            "Failure while loading azureml_run_type_providers. Failed to load entrypoint automl = azureml.train.automl.run:AutoMLRun._from_run_dto with exception (pyarrow 2.0.0 (/usr/local/lib/python3.6/dist-packages), Requirement.parse('pyarrow<2.0.0,>=0.17.0'), {'azureml-dataset-runtime'}).\n",
            "10/23/2020 16:01:59 - WARNING - __main__ -   Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "10/23/2020 16:01:59 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='output', overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=False, evaluate_during_training=False, evaluation_strategy=<EvaluationStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=32, per_device_eval_batch_size=8, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=2e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, warmup_steps=0, logging_dir='runs/Oct23_16-01-59_0e079dfba1a4', logging_first_step=False, logging_steps=8, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, debug=False, dataloader_drop_last=False, eval_steps=4, dataloader_num_workers=0, past_index=-1, run_name='output', disable_tqdm=False, remove_unused_columns=True, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None)\n",
            "10/23/2020 16:01:59 - INFO - filelock -   Lock 140494472776168 acquired on /root/.cache/huggingface/datasets/9ed4f2e133395826175a892c70611f68522c7bc61a35476e8b51a31afb76e4bf.e6f3e3f3e3875a07469d1cfd32e16e1d06b149616b11eef2d081c43d515b492d.py.lock\n",
            "Downloading: 28.7kB [00:00, 26.6MB/s]       \n",
            "10/23/2020 16:01:59 - INFO - filelock -   Lock 140494472776168 released on /root/.cache/huggingface/datasets/9ed4f2e133395826175a892c70611f68522c7bc61a35476e8b51a31afb76e4bf.e6f3e3f3e3875a07469d1cfd32e16e1d06b149616b11eef2d081c43d515b492d.py.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140494472776168 acquired on /root/.cache/huggingface/datasets/acb4af7ec7ef94a60af00a8439cb7d3cc6dde28c763025a3a5e719ff2bfdddf4.082d8848abcb8cddda90647ec069014ca338abd4f45e0a83c6df1ece0d45476a.lock\n",
            "Downloading: 28.7kB [00:00, 29.9MB/s]       \n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140494472776168 released on /root/.cache/huggingface/datasets/acb4af7ec7ef94a60af00a8439cb7d3cc6dde28c763025a3a5e719ff2bfdddf4.082d8848abcb8cddda90647ec069014ca338abd4f45e0a83c6df1ece0d45476a.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492048227184 acquired on /root/.cache/huggingface/datasets/9ed4f2e133395826175a892c70611f68522c7bc61a35476e8b51a31afb76e4bf.e6f3e3f3e3875a07469d1cfd32e16e1d06b149616b11eef2d081c43d515b492d.py.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492048227184 released on /root/.cache/huggingface/datasets/9ed4f2e133395826175a892c70611f68522c7bc61a35476e8b51a31afb76e4bf.e6f3e3f3e3875a07469d1cfd32e16e1d06b149616b11eef2d081c43d515b492d.py.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492047939344 acquired on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492047939344 released on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.lock\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492047938560 acquired on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.lock\n",
            "Downloading and preparing dataset glue/cola (download: 368.14 KiB, generated: 596.73 KiB, post-processed: Unknown size, total: 964.86 KiB) to /root/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4...\n",
            "10/23/2020 16:02:00 - INFO - filelock -   Lock 140492032718944 acquired on /root/.cache/huggingface/datasets/downloads/bb971ae26c644d1ca0a93f2edb4402d5934802431d3ccce209d74ddeef0c5815.lock\n",
            "Downloading: 100% 377k/377k [00:00<00:00, 1.12MB/s]\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492032718944 released on /root/.cache/huggingface/datasets/downloads/bb971ae26c644d1ca0a93f2edb4402d5934802431d3ccce209d74ddeef0c5815.lock\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492032255816 acquired on /root/.cache/huggingface/datasets/downloads/bb971ae26c644d1ca0a93f2edb4402d5934802431d3ccce209d74ddeef0c5815.lock\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492032255816 released on /root/.cache/huggingface/datasets/downloads/bb971ae26c644d1ca0a93f2edb4402d5934802431d3ccce209d74ddeef0c5815.lock\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492331181672 acquired on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.incomplete.lock\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492331181672 released on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.incomplete.lock\n",
            "Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4. Subsequent calls will reuse this data.\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492047938560 released on /root/.cache/huggingface/datasets/_root_.cache_huggingface_datasets_glue_cola_1.0.0_7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4.lock\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492047939344 acquired on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391.lock\n",
            "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp7l_29kac\n",
            "Downloading: 100% 433/433 [00:00<00:00, 637kB/s]\n",
            "storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json in cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391\n",
            "creating metadata file for /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492047939344 released on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391.lock\n",
            "loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json from cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391\n",
            "Model config BertConfig {\n",
            "  \"architectures\": [\n",
            "    \"BertForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"finetuning_task\": \"cola\",\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"vocab_size\": 28996\n",
            "}\n",
            "\n",
            "loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json from cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391\n",
            "Model config BertConfig {\n",
            "  \"architectures\": [\n",
            "    \"BertForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"vocab_size\": 28996\n",
            "}\n",
            "\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492032257552 acquired on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
            "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp3vt33d6w\n",
            "Downloading: 100% 213k/213k [00:00<00:00, 21.4MB/s]\n",
            "storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt in cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
            "creating metadata file for /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
            "10/23/2020 16:02:01 - INFO - filelock -   Lock 140492032257552 released on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
            "10/23/2020 16:02:02 - INFO - filelock -   Lock 140492032718552 acquired on /root/.cache/torch/transformers/6ab937566bf23d0966726e249cb1c67de22c8183f407192a9ec95cedf9c27e4b.eee7e15a2d7964146577024e550315de88ab80a1ca2a91fba0a90f6190938d29.lock\n",
            "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpdx56csjr\n",
            "Downloading: 100% 436k/436k [00:00<00:00, 22.8MB/s]\n",
            "storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-tokenizer.json in cache at /root/.cache/torch/transformers/6ab937566bf23d0966726e249cb1c67de22c8183f407192a9ec95cedf9c27e4b.eee7e15a2d7964146577024e550315de88ab80a1ca2a91fba0a90f6190938d29\n",
            "creating metadata file for /root/.cache/torch/transformers/6ab937566bf23d0966726e249cb1c67de22c8183f407192a9ec95cedf9c27e4b.eee7e15a2d7964146577024e550315de88ab80a1ca2a91fba0a90f6190938d29\n",
            "10/23/2020 16:02:02 - INFO - filelock -   Lock 140492032718552 released on /root/.cache/torch/transformers/6ab937566bf23d0966726e249cb1c67de22c8183f407192a9ec95cedf9c27e4b.eee7e15a2d7964146577024e550315de88ab80a1ca2a91fba0a90f6190938d29.lock\n",
            "loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
            "loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-tokenizer.json from cache at /root/.cache/torch/transformers/6ab937566bf23d0966726e249cb1c67de22c8183f407192a9ec95cedf9c27e4b.eee7e15a2d7964146577024e550315de88ab80a1ca2a91fba0a90f6190938d29\n",
            "10/23/2020 16:02:02 - INFO - filelock -   Lock 140492048226568 acquired on /root/.cache/torch/transformers/d8f11f061e407be64c4d5d7867ee61d1465263e24085cfa26abf183fdc830569.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
            "https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp1e16vfi1\n",
            "Downloading: 100% 436M/436M [00:05<00:00, 86.1MB/s]\n",
            "storing https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin in cache at /root/.cache/torch/transformers/d8f11f061e407be64c4d5d7867ee61d1465263e24085cfa26abf183fdc830569.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
            "creating metadata file for /root/.cache/torch/transformers/d8f11f061e407be64c4d5d7867ee61d1465263e24085cfa26abf183fdc830569.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
            "10/23/2020 16:02:07 - INFO - filelock -   Lock 140492048226568 released on /root/.cache/torch/transformers/d8f11f061e407be64c4d5d7867ee61d1465263e24085cfa26abf183fdc830569.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
            "loading weights file https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin from cache at /root/.cache/torch/transformers/d8f11f061e407be64c4d5d7867ee61d1465263e24085cfa26abf183fdc830569.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
            "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
            "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
            "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "100% 9/9 [00:00<00:00, 21.23ba/s]\n",
            "100% 2/2 [00:00<00:00, 34.93ba/s]\n",
            "100% 2/2 [00:00<00:00, 35.15ba/s]\n",
            "10/23/2020 16:02:11 - INFO - __main__ -   Sample 1824 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 1824, 'input_ids': [101, 146, 8646, 1115, 1139, 1401, 117, 1119, 1108, 3600, 1112, 1126, 19976, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'label': 0, 'sentence': 'I acknowledged that my father, he was tight as an owl.', 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "10/23/2020 16:02:11 - INFO - __main__ -   Sample 409 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 409, 'input_ids': [101, 1370, 1140, 1106, 1202, 1115, 1156, 1129, 170, 6223, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'label': 1, 'sentence': 'For him to do that would be a mistake.', 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "10/23/2020 16:02:11 - INFO - __main__ -   Sample 4506 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'idx': 4506, 'input_ids': [101, 2090, 6407, 170, 1461, 117, 1133, 2499, 1309, 1225, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'label': 1, 'sentence': 'Mary sang a song, but Lee never did.', 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "10/23/2020 16:02:11 - INFO - filelock -   Lock 140492331418568 acquired on /root/.cache/huggingface/datasets/b16d3a04bf2cad1346896852bf120ba846ea1bebb1cd60255bb3a1a2bbcc3a67.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py.lock\n",
            "Downloading: 4.39kB [00:00, 4.97MB/s]       \n",
            "10/23/2020 16:02:11 - INFO - filelock -   Lock 140492331418568 released on /root/.cache/huggingface/datasets/b16d3a04bf2cad1346896852bf120ba846ea1bebb1cd60255bb3a1a2bbcc3a67.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py.lock\n",
            "10/23/2020 16:02:11 - INFO - filelock -   Lock 140492184844216 acquired on /root/.cache/huggingface/datasets/b16d3a04bf2cad1346896852bf120ba846ea1bebb1cd60255bb3a1a2bbcc3a67.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py.lock\n",
            "10/23/2020 16:02:11 - INFO - filelock -   Lock 140492184844216 released on /root/.cache/huggingface/datasets/b16d3a04bf2cad1346896852bf120ba846ea1bebb1cd60255bb3a1a2bbcc3a67.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py.lock\n",
            "The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence.\n",
            "The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence.\n",
            "10/23/2020 16:02:25 - INFO - azureml.core.run -   Could not load the run context. Logging offline\n",
            "***** Running training *****\n",
            "  Num examples = 8551\n",
            "  Num Epochs = 3\n",
            "  Instantaneous batch size per device = 32\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 32\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 804\n",
            "  0% 4/804 [00:02<09:36,  1.39it/s]***** Running Evaluation *****\n",
            "  Num examples = 1043\n",
            "  Batch size = 8\n",
            "\n",
            "  0% 0/131 [00:00<?, ?it/s]\u001b[A\n",
            "  2% 3/131 [00:00<00:04, 27.98it/s]\u001b[A\n",
            "  4% 5/131 [00:00<00:05, 23.47it/s]\u001b[A\n",
            "  5% 7/131 [00:00<00:05, 21.17it/s]\u001b[A\n",
            "  7% 9/131 [00:00<00:06, 20.12it/s]\u001b[A\n",
            "  8% 11/131 [00:00<00:06, 19.27it/s]\u001b[A\n",
            " 10% 13/131 [00:00<00:06, 18.63it/s]\u001b[A\n",
            " 11% 15/131 [00:00<00:06, 18.16it/s]\u001b[A\n",
            " 13% 17/131 [00:00<00:06, 17.68it/s]\u001b[A\n",
            " 15% 19/131 [00:01<00:06, 17.73it/s]\u001b[A\n",
            " 16% 21/131 [00:01<00:06, 17.54it/s]\u001b[A\n",
            " 18% 23/131 [00:01<00:06, 17.37it/s]\u001b[A\n",
            " 19% 25/131 [00:01<00:06, 17.52it/s]\u001b[A\n",
            " 21% 27/131 [00:01<00:05, 17.61it/s]\u001b[A\n",
            " 22% 29/131 [00:01<00:05, 17.63it/s]\u001b[A\n",
            " 24% 31/131 [00:01<00:05, 17.46it/s]\u001b[A\n",
            " 25% 33/131 [00:01<00:05, 17.33it/s]\u001b[A\n",
            " 27% 35/131 [00:01<00:05, 17.20it/s]\u001b[A\n",
            " 28% 37/131 [00:02<00:05, 17.32it/s]\u001b[A\n",
            " 30% 39/131 [00:02<00:05, 17.27it/s]\u001b[A\n",
            " 31% 41/131 [00:02<00:05, 17.37it/s]\u001b[A\n",
            " 33% 43/131 [00:02<00:05, 17.46it/s]\u001b[A\n",
            " 34% 45/131 [00:02<00:04, 17.38it/s]\u001b[A\n",
            " 36% 47/131 [00:02<00:04, 17.39it/s]\u001b[A\n",
            " 37% 49/131 [00:02<00:04, 17.20it/s]\u001b[A\n",
            " 39% 51/131 [00:02<00:04, 17.19it/s]\u001b[A\n",
            " 40% 53/131 [00:02<00:04, 17.25it/s]\u001b[A\n",
            " 42% 55/131 [00:03<00:04, 17.15it/s]\u001b[A\n",
            " 44% 57/131 [00:03<00:04, 17.11it/s]\u001b[A\n",
            " 45% 59/131 [00:03<00:04, 17.39it/s]\u001b[A\n",
            " 47% 61/131 [00:03<00:04, 17.28it/s]\u001b[A\n",
            " 48% 63/131 [00:03<00:03, 17.36it/s]\u001b[A\n",
            " 50% 65/131 [00:03<00:03, 17.32it/s]\u001b[A\n",
            " 51% 67/131 [00:03<00:03, 17.34it/s]\u001b[A\n",
            " 53% 69/131 [00:03<00:03, 17.24it/s]\u001b[A\n",
            " 54% 71/131 [00:04<00:03, 17.12it/s]\u001b[A\n",
            " 56% 73/131 [00:04<00:03, 17.27it/s]\u001b[A\n",
            " 57% 75/131 [00:04<00:03, 17.27it/s]\u001b[A\n",
            " 59% 77/131 [00:04<00:03, 17.29it/s]\u001b[A\n",
            " 60% 79/131 [00:04<00:03, 17.25it/s]\u001b[A\n",
            " 62% 81/131 [00:04<00:02, 17.23it/s]\u001b[A\n",
            " 63% 83/131 [00:04<00:02, 17.27it/s]\u001b[A\n",
            " 65% 85/131 [00:04<00:02, 17.23it/s]\u001b[A\n",
            " 66% 87/131 [00:04<00:02, 17.17it/s]\u001b[A\n",
            " 68% 89/131 [00:05<00:02, 17.03it/s]\u001b[A\n",
            " 69% 91/131 [00:05<00:02, 17.15it/s]\u001b[A\n",
            " 71% 93/131 [00:05<00:02, 16.94it/s]\u001b[A\n",
            " 73% 95/131 [00:05<00:02, 17.40it/s]\u001b[A\n",
            " 74% 97/131 [00:05<00:01, 17.25it/s]\u001b[A\n",
            " 76% 99/131 [00:05<00:01, 17.37it/s]\u001b[A\n",
            " 77% 101/131 [00:05<00:01, 17.32it/s]\u001b[A\n",
            " 79% 103/131 [00:05<00:01, 17.24it/s]\u001b[A\n",
            " 80% 105/131 [00:06<00:01, 17.22it/s]\u001b[A\n",
            " 82% 107/131 [00:06<00:01, 17.24it/s]\u001b[A\n",
            " 83% 109/131 [00:06<00:01, 17.24it/s]\u001b[A\n",
            " 85% 111/131 [00:06<00:01, 17.11it/s]\u001b[A\n",
            " 86% 113/131 [00:06<00:01, 17.28it/s]\u001b[A\n",
            " 88% 115/131 [00:06<00:00, 17.27it/s]\u001b[A\n",
            " 89% 117/131 [00:06<00:00, 17.20it/s]\u001b[A\n",
            " 91% 119/131 [00:06<00:00, 17.06it/s]\u001b[A\n",
            " 92% 121/131 [00:06<00:00, 17.08it/s]\u001b[A\n",
            " 94% 123/131 [00:07<00:00, 17.07it/s]\u001b[A\n",
            " 95% 125/131 [00:07<00:00, 17.27it/s]\u001b[A\n",
            " 97% 127/131 [00:07<00:00, 17.13it/s]\u001b[A\n",
            " 98% 129/131 [00:07<00:00, 17.16it/s]\u001b[A\n",
            "100% 131/131 [00:07<00:00, 17.17it/s]\u001b[A10/23/2020 16:02:35 - INFO - filelock -   Lock 140492031355032 acquired on /root/.cache/huggingface/metrics/glue/cola/default_experiment-1-0.arrow.lock\n",
            "10/23/2020 16:02:35 - INFO - filelock -   Lock 140492031355032 released on /root/.cache/huggingface/metrics/glue/cola/default_experiment-1-0.arrow.lock\n",
            "10/23/2020 16:02:35 - INFO - filelock -   Lock 140492030188848 acquired on /root/.cache/huggingface/metrics/glue/cola/default_experiment-1-0.arrow.lock\n",
            "10/23/2020 16:02:35 - INFO - /usr/local/lib/python3.6/dist-packages/datasets/metric.py -   Removing /root/.cache/huggingface/metrics/glue/cola/default_experiment-1-0.arrow\n",
            "10/23/2020 16:02:35 - INFO - filelock -   Lock 140492030188848 released on /root/.cache/huggingface/metrics/glue/cola/default_experiment-1-0.arrow.lock\n",
            "Traceback (most recent call last):\n",
            "  File \"run_glue.py\", line 417, in <module>\n",
            "    main()\n",
            "  File \"run_glue.py\", line 352, in main\n",
            "    model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer.py\", line 792, in train\n",
            "    self._maybe_log_save_evaluate(tr_loss, model, trial, epoch)\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer.py\", line 853, in _maybe_log_save_evaluate\n",
            "    metrics = self.evaluate()\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer.py\", line 1291, in evaluate\n",
            "    self.log(output.metrics)\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer.py\", line 1044, in log\n",
            "    self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs)\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer_callback.py\", line 366, in on_log\n",
            "    return self.call_event(\"on_log\", args, state, control, logs=logs)\n",
            "  File \"/usr/local/lib/python3.6/dist-packages/transformers/trainer_callback.py\", line 382, in call_event\n",
            "    **kwargs,\n",
            "TypeError: on_log() got multiple values for argument 'logs'\n",
            "\n",
            "  0% 4/804 [00:10<34:45,  2.61s/it]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}