-
-
Save ceshine/752c77742973a013320a9f20384528a1 to your computer and use it in GitHub Desktop.
quest-public.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "quest-public.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyNHvdkFfvCQLr8F+fTSgxtj", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "TPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/ceshine/752c77742973a013320a9f20384528a1/quest-public.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7BI0qPAAyWFS", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Prepare environment" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "39MSBo8QzdIE", | |
"colab_type": "code", | |
"outputId": "a51f0a08-07bf-472a-e6a4-415290e3d1a5", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"%tensorflow_version 2.x" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"TensorFlow 2.x selected.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_R5grSggQIBk", | |
"colab_type": "code", | |
"outputId": "52024d31-29bf-470b-f70c-d081986f790d", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"import tensorflow as tf\n", | |
"tf.__version__" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'2.1.0'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "r6FwWWP9zmKc", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Restart the runtime after running and then skip this step\n", | |
"!pip install -U wandb fire transformers==2.3.0 python-telegram-bot kaggle ipykernel" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "m5DJi5zI-nJb", | |
"colab_type": "code", | |
"colab": {}, | |
"cellView": "both" | |
}, | |
"source": [ | |
"#@title Secrets (Leave TG constants empty if you don't want Telegram notification)\n", | |
"TG_TOKEN = '' #@param {type: \"string\"}\n", | |
"TG_CHAT_ID = '' #@param {type: \"string\"}\n", | |
"KAGGLE_API_KEY = 'api key used for uploading the dataset' #@param {type: \"string\"}" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wDjoS9NEzqmK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Authenticate to read from Google Cloud Storage\n", | |
"from google.colab import auth\n", | |
"auth.authenticate_user()" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Ma7tIEOpzvZ1", | |
"colab_type": "code", | |
"outputId": "29a6e280-b1b5-4c99-bcad-7eb1abbe8756", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 86 | |
} | |
}, | |
"source": [ | |
"!gsutil cp gs://ceshine-colab-tmp-2/quest/*.whl ." | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Copying gs://ceshine-colab-tmp-2/quest/quest-0.0.1-py3-none-any.whl...\n", | |
"/ [0 files][ 0.0 B/ 18.0 KiB] \r/ [1 files][ 18.0 KiB/ 18.0 KiB] \rCopying gs://ceshine-colab-tmp-2/quest/tf_helper_bot-0.0.1-py3-none-any.whl...\n", | |
"/ [1 files][ 18.0 KiB/ 31.2 KiB] \r/ [2 files][ 31.2 KiB/ 31.2 KiB] \r\n", | |
"Operation completed over 2 objects/31.2 KiB. \n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MOxYSAxD2csb", | |
"colab_type": "code", | |
"outputId": "023784ea-4f39-4d8a-f73c-2ccf1f9df387", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 121 | |
} | |
}, | |
"source": [ | |
"!pip install --force-reinstall tf_helper_bot-0.0.1-py3-none-any.whl\n", | |
"!pip install --force-reinstall quest-0.0.1-py3-none-any.whl" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Processing ./tf_helper_bot-0.0.1-py3-none-any.whl\n", | |
"Installing collected packages: tf-helper-bot\n", | |
"Successfully installed tf-helper-bot-0.0.1\n", | |
"Processing ./quest-0.0.1-py3-none-any.whl\n", | |
"Installing collected packages: quest\n", | |
"Successfully installed quest-0.0.1\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JhZ9gEFx9l1j", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!mkdir -p ~/.kaggle\n", | |
"content = '\\'{\"username\":\"ceshine\",\"key\":\"%s\"}\\'' % KAGGLE_API_KEY\n", | |
"!echo {content} > ~/.kaggle/kaggle.json\n", | |
"!chmod 600 ~/.kaggle/kaggle.json" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "x7IyXDDq2jWd", | |
"colab_type": "code", | |
"outputId": "5f6f0c3d-7a95-4857-e023-3d5aee9ab8a7", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 243 | |
} | |
}, | |
"source": [ | |
"!pip install -U --force-reinstall --no-deps kaggle" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting kaggle\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/62/ab/bb20f9b9e24f9a6250f95a432f8d9a7d745f8d24039d7a5a6eaadb7783ba/kaggle-1.5.6.tar.gz (58kB)\n", | |
"\r\u001b[K |█████▋ | 10kB 18.4MB/s eta 0:00:01\r\u001b[K |███████████▎ | 20kB 2.2MB/s eta 0:00:01\r\u001b[K |█████████████████ | 30kB 3.1MB/s eta 0:00:01\r\u001b[K |██████████████████████▌ | 40kB 2.1MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 51kB 2.5MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 61kB 2.4MB/s \n", | |
"\u001b[?25hBuilding wheels for collected packages: kaggle\n", | |
" Building wheel for kaggle (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for kaggle: filename=kaggle-1.5.6-cp36-none-any.whl size=72859 sha256=6f91a26c57e5844d3334549065a3f2a05392b2894c90e41132fe88097c461c1b\n", | |
" Stored in directory: /root/.cache/pip/wheels/57/4e/e8/bb28d035162fb8f17f8ca5d42c3230e284c6aa565b42b72674\n", | |
"Successfully built kaggle\n", | |
"Installing collected packages: kaggle\n", | |
" Found existing installation: kaggle 1.5.6\n", | |
" Uninstalling kaggle-1.5.6:\n", | |
" Successfully uninstalled kaggle-1.5.6\n", | |
"Successfully installed kaggle-1.5.6\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "U-6lZ5Fg9xiK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!mkdir -p cache/kaggleds\n", | |
"!cd cache/kaggleds # && kaggle datasets metadata quest-models " | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_opEfdnN2_Oq", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!echo '{ \\\n", | |
" \"subtitle\": \"\", \\\n", | |
" \"description\": \"\", \\\n", | |
" \"title\": \"Quest Models\", \\\n", | |
" \"keywords\": [], \\\n", | |
" \"id\": \"ceshine/quest-models-public\"\\\n", | |
"}' > cache/kaggleds/dataset-metadata.json" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "c2txSWpZ90QF", | |
"colab_type": "code", | |
"outputId": "a64967ab-2ba7-4ec0-eda2-4c2b087509e8", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 243 | |
} | |
}, | |
"source": [ | |
"!gsutil cp -r gs://ceshine-colab-tmp-2/quest/tokenizer* cache/kaggleds/" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Copying gs://ceshine-colab-tmp-2/quest/tokenizer_roberta-base/added_tokens.json...\n", | |
"Copying gs://ceshine-colab-tmp-2/quest/tokenizer_roberta-base/merges.txt...\n", | |
"Copying gs://ceshine-colab-tmp-2/quest/tokenizer_roberta-base/special_tokens_map.json...\n", | |
"Copying gs://ceshine-colab-tmp-2/quest/tokenizer_roberta-base/tokenizer_config.json...\n", | |
"/ [4 files][445.8 KiB/445.8 KiB] \n", | |
"==> NOTE: You are performing a sequence of gsutil operations that may\n", | |
"run significantly faster if you instead use gsutil -m cp ... Please\n", | |
"see the -m section under \"gsutil help options\" for further information\n", | |
"about when gsutil -m can be advantageous.\n", | |
"\n", | |
"Copying gs://ceshine-colab-tmp-2/quest/tokenizer_roberta-base/vocab.json...\n", | |
"/ [5 files][ 1.3 MiB/ 1.3 MiB] \n", | |
"Operation completed over 5 objects/1.3 MiB. \n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "0Mmnav9C_WiU", | |
"colab_type": "code", | |
"outputId": "289732a7-c7a9-4a75-fb02-018c45bddf3e", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"!ls cache/kaggleds" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"dataset-metadata.json tokenizer_roberta-base\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "p6zDyKKQFMl8", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"content = f'export TG_TOKEN={TG_TOKEN} TG_CHAT_ID={TG_CHAT_ID} TF_CPP_MIN_LOG_LEVEL=3'\n", | |
"!echo {content} > ./env.sh" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "KZJW7utQypyq", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Train models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "emqTCElpHCMF", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!rm -rf cache/kaggleds/*fold*" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "V-Qa9agMpJdS", | |
"colab_type": "code", | |
"outputId": "7cbf7f6e-c531-401a-89b6-ad4f81f56f36", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
} | |
}, | |
"source": [ | |
"!source env.sh && \\\n", | |
" python -m quest.train_folds --model-name roberta-base --batch-size 2 \\\n", | |
" --train-path-pattern \"gs://ceshine-colab-tmp-2/quest/train-%d-*.tfrec\" \\\n", | |
" --valid-path-pattern \"gs://ceshine-colab-tmp-2/quest/valid-%d-*.tfrec\" \\\n", | |
" --max_lr 1e-4 -n-folds 5 --freeze 3 \\\n", | |
" --steps 1500 --checkpoint-interval 300 --log-interval 150 \\\n", | |
" --output_path_pattern \"cache/kaggleds/roberta-base-fold-%d\"" | |
], | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"====================\n", | |
"Training Fold 1\n", | |
"====================\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0\n", | |
"REPLICAS: 8\n", | |
"cnt: 4863 batch size: 16\n", | |
"cnt: 1216 batch size: 32\n", | |
"Model: \"dual_roberta_model\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_38 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_1 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_2 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 3,278,046\n", | |
"Non-trainable params: 124,645,632\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Train for 912 steps\n", | |
"912/912 [==============================] - 118s 129ms/step - loss: 0.4473\n", | |
"Model: \"dual_roberta_model\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_38 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_1 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_2 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 127,923,678\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Steps per epoch: 304 | 38\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", | |
" \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"[INFO][02/12/2020 12:30:08] Step 150 | loss 0.3948 | lr 1.00e-04 | 0.933s per step\n", | |
"[INFO][02/12/2020 12:30:42] Step 300 | loss 0.3839 | lr 9.70e-05 | 0.228s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:20<00:00, 6.68it/s]\n", | |
"[INFO][02/12/2020 12:31:03] Metrics at step 300:\n", | |
"[INFO][02/12/2020 12:31:03] loss: 0.3749\n", | |
"[INFO][02/12/2020 12:31:03] spearman: 38.21\n", | |
"[INFO][02/12/2020 12:31:40] Step 450 | loss 0.3719 | lr 8.83e-05 | 0.386s per step\n", | |
"[INFO][02/12/2020 12:32:15] Step 600 | loss 0.3676 | lr 7.50e-05 | 0.230s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.73it/s]\n", | |
"[INFO][02/12/2020 12:32:21] Metrics at step 600:\n", | |
"[INFO][02/12/2020 12:32:21] loss: 0.3716\n", | |
"[INFO][02/12/2020 12:32:21] spearman: 39.09\n", | |
"[INFO][02/12/2020 12:32:58] Step 750 | loss 0.3611 | lr 5.87e-05 | 0.291s per step\n", | |
"[INFO][02/12/2020 12:33:33] Step 900 | loss 0.3588 | lr 4.13e-05 | 0.231s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:05<00:00, 6.55it/s]\n", | |
"[INFO][02/12/2020 12:33:39] Metrics at step 900:\n", | |
"[INFO][02/12/2020 12:33:39] loss: 0.3683\n", | |
"[INFO][02/12/2020 12:33:39] spearman: 40.49\n", | |
"[INFO][02/12/2020 12:34:16] Step 1050 | loss 0.3496 | lr 2.50e-05 | 0.290s per step\n", | |
"[INFO][02/12/2020 12:34:51] Step 1200 | loss 0.3484 | lr 1.17e-05 | 0.230s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.75it/s]\n", | |
"[INFO][02/12/2020 12:34:57] Metrics at step 1200:\n", | |
"[INFO][02/12/2020 12:34:57] loss: 0.3681\n", | |
"[INFO][02/12/2020 12:34:57] spearman: 40.60\n", | |
"[INFO][02/12/2020 12:35:35] Step 1350 | loss 0.3424 | lr 3.03e-06 | 0.291s per step\n", | |
"[INFO][02/12/2020 12:36:09] Step 1500 | loss 0.3402 | lr 1.00e-08 | 0.229s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.58it/s]\n", | |
"[INFO][02/12/2020 12:36:15] Metrics at step 1500:\n", | |
"[INFO][02/12/2020 12:36:15] loss: 0.3690\n", | |
"[INFO][02/12/2020 12:36:15] spearman: 40.55\n", | |
"[INFO][02/12/2020 12:36:15] Training finished. Best step(s):\n", | |
"[INFO][02/12/2020 12:36:15] loss: 0.3681 @ step 1200\n", | |
"[INFO][02/12/2020 12:36:15] spearman: 40.60 @ step 1200\n", | |
"====================\n", | |
"Training Fold 2\n", | |
"====================\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"REPLICAS: 8\n", | |
"cnt: 4863 batch size: 16\n", | |
"cnt: 1216 batch size: 32\n", | |
"Model: \"dual_roberta_model_1\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_77 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_3 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_4 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_5 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 3,278,046\n", | |
"Non-trainable params: 124,645,632\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Train for 912 steps\n", | |
"912/912 [==============================] - 123s 134ms/step - loss: 0.4428\n", | |
"Model: \"dual_roberta_model_1\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_77 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_3 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_4 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_5 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 127,923,678\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Steps per epoch: 304 | 38\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", | |
" \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"[INFO][02/12/2020 12:41:36] Step 150 | loss 0.3958 | lr 1.00e-04 | 0.933s per step\n", | |
"[INFO][02/12/2020 12:42:11] Step 300 | loss 0.3848 | lr 9.70e-05 | 0.228s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:20<00:00, 6.54it/s]\n", | |
"[INFO][02/12/2020 12:42:31] Metrics at step 300:\n", | |
"[INFO][02/12/2020 12:42:31] loss: 0.3772\n", | |
"[INFO][02/12/2020 12:42:31] spearman: 36.65\n", | |
"[INFO][02/12/2020 12:43:09] Step 450 | loss 0.3718 | lr 8.83e-05 | 0.386s per step\n", | |
"[INFO][02/12/2020 12:43:44] Step 600 | loss 0.3681 | lr 7.50e-05 | 0.234s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.80it/s]\n", | |
"[INFO][02/12/2020 12:43:50] Metrics at step 600:\n", | |
"[INFO][02/12/2020 12:43:50] loss: 0.3712\n", | |
"[INFO][02/12/2020 12:43:50] spearman: 38.33\n", | |
"[INFO][02/12/2020 12:44:28] Step 750 | loss 0.3627 | lr 5.87e-05 | 0.294s per step\n", | |
"[INFO][02/12/2020 12:45:02] Step 900 | loss 0.3574 | lr 4.13e-05 | 0.232s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.56it/s]\n", | |
"[INFO][02/12/2020 12:45:09] Metrics at step 900:\n", | |
"[INFO][02/12/2020 12:45:09] loss: 0.3697\n", | |
"[INFO][02/12/2020 12:45:09] spearman: 38.91\n", | |
"[INFO][02/12/2020 12:45:46] Step 1050 | loss 0.3528 | lr 2.50e-05 | 0.293s per step\n", | |
"[INFO][02/12/2020 12:46:21] Step 1200 | loss 0.3459 | lr 1.17e-05 | 0.233s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.57it/s]\n", | |
"[INFO][02/12/2020 12:46:27] Metrics at step 1200:\n", | |
"[INFO][02/12/2020 12:46:27] loss: 0.3664\n", | |
"[INFO][02/12/2020 12:46:27] spearman: 39.43\n", | |
"[INFO][02/12/2020 12:47:05] Step 1350 | loss 0.3429 | lr 3.03e-06 | 0.290s per step\n", | |
"[INFO][02/12/2020 12:47:40] Step 1500 | loss 0.3392 | lr 1.00e-08 | 0.231s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.66it/s]\n", | |
"[INFO][02/12/2020 12:47:46] Metrics at step 1500:\n", | |
"[INFO][02/12/2020 12:47:46] loss: 0.3671\n", | |
"[INFO][02/12/2020 12:47:46] spearman: 39.63\n", | |
"[INFO][02/12/2020 12:47:49] Training finished. Best step(s):\n", | |
"[INFO][02/12/2020 12:47:49] loss: 0.3664 @ step 1200\n", | |
"[INFO][02/12/2020 12:47:49] spearman: 39.63 @ step 1500\n", | |
"====================\n", | |
"Training Fold 3\n", | |
"====================\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"REPLICAS: 8\n", | |
"cnt: 4863 batch size: 16\n", | |
"cnt: 1216 batch size: 32\n", | |
"Model: \"dual_roberta_model_2\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_116 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_6 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_7 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_8 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 3,278,046\n", | |
"Non-trainable params: 124,645,632\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Train for 912 steps\n", | |
"912/912 [==============================] - 121s 133ms/step - loss: 0.4483\n", | |
"Model: \"dual_roberta_model_2\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_116 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_6 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_7 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_8 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 127,923,678\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Steps per epoch: 304 | 38\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", | |
" \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"[INFO][02/12/2020 12:53:12] Step 150 | loss 0.3958 | lr 1.00e-04 | 0.937s per step\n", | |
"[INFO][02/12/2020 12:53:46] Step 300 | loss 0.3854 | lr 9.70e-05 | 0.229s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:21<00:00, 6.47it/s]\n", | |
"[INFO][02/12/2020 12:54:07] Metrics at step 300:\n", | |
"[INFO][02/12/2020 12:54:07] loss: 0.3714\n", | |
"[INFO][02/12/2020 12:54:07] spearman: 37.81\n", | |
"[INFO][02/12/2020 12:54:45] Step 450 | loss 0.3736 | lr 8.83e-05 | 0.394s per step\n", | |
"[INFO][02/12/2020 12:55:19] Step 600 | loss 0.3682 | lr 7.50e-05 | 0.229s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.46it/s]\n", | |
"[INFO][02/12/2020 12:55:26] Metrics at step 600:\n", | |
"[INFO][02/12/2020 12:55:26] loss: 0.3689\n", | |
"[INFO][02/12/2020 12:55:26] spearman: 39.83\n", | |
"[INFO][02/12/2020 12:56:03] Step 750 | loss 0.3611 | lr 5.87e-05 | 0.294s per step\n", | |
"[INFO][02/12/2020 12:56:38] Step 900 | loss 0.3580 | lr 4.13e-05 | 0.232s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.39it/s]\n", | |
"[INFO][02/12/2020 12:56:45] Metrics at step 900:\n", | |
"[INFO][02/12/2020 12:56:45] loss: 0.3641\n", | |
"[INFO][02/12/2020 12:56:45] spearman: 40.62\n", | |
"[INFO][02/12/2020 12:57:23] Step 1050 | loss 0.3512 | lr 2.50e-05 | 0.298s per step\n", | |
"[INFO][02/12/2020 12:57:58] Step 1200 | loss 0.3466 | lr 1.17e-05 | 0.233s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.49it/s]\n", | |
"[INFO][02/12/2020 12:58:04] Metrics at step 1200:\n", | |
"[INFO][02/12/2020 12:58:04] loss: 0.3640\n", | |
"[INFO][02/12/2020 12:58:04] spearman: 41.06\n", | |
"[INFO][02/12/2020 12:58:42] Step 1350 | loss 0.3410 | lr 3.03e-06 | 0.294s per step\n", | |
"[INFO][02/12/2020 12:59:17] Step 1500 | loss 0.3386 | lr 1.00e-08 | 0.236s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.60it/s]\n", | |
"[INFO][02/12/2020 12:59:23] Metrics at step 1500:\n", | |
"[INFO][02/12/2020 12:59:23] loss: 0.3650\n", | |
"[INFO][02/12/2020 12:59:23] spearman: 40.91\n", | |
"[INFO][02/12/2020 12:59:23] Training finished. Best step(s):\n", | |
"[INFO][02/12/2020 12:59:23] loss: 0.3640 @ step 1200\n", | |
"[INFO][02/12/2020 12:59:23] spearman: 41.06 @ step 1200\n", | |
"====================\n", | |
"Training Fold 4\n", | |
"====================\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"REPLICAS: 8\n", | |
"cnt: 4863 batch size: 16\n", | |
"cnt: 1216 batch size: 32\n", | |
"Model: \"dual_roberta_model_3\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_155 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_9 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_10 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_11 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 3,278,046\n", | |
"Non-trainable params: 124,645,632\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Train for 912 steps\n", | |
"912/912 [==============================] - 124s 136ms/step - loss: 0.4514\n", | |
"Model: \"dual_roberta_model_3\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_155 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_9 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_10 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_11 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 127,923,678\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Steps per epoch: 304 | 38\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", | |
" \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"[INFO][02/12/2020 13:04:47] Step 150 | loss 0.3966 | lr 1.00e-04 | 0.953s per step\n", | |
"[INFO][02/12/2020 13:05:21] Step 300 | loss 0.3836 | lr 9.70e-05 | 0.231s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:20<00:00, 6.53it/s]\n", | |
"[INFO][02/12/2020 13:05:42] Metrics at step 300:\n", | |
"[INFO][02/12/2020 13:05:42] loss: 0.3709\n", | |
"[INFO][02/12/2020 13:05:42] spearman: 36.84\n", | |
"[INFO][02/12/2020 13:06:20] Step 450 | loss 0.3744 | lr 8.83e-05 | 0.390s per step\n", | |
"[INFO][02/12/2020 13:06:54] Step 600 | loss 0.3697 | lr 7.50e-05 | 0.230s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.55it/s]\n", | |
"[INFO][02/12/2020 13:07:01] Metrics at step 600:\n", | |
"[INFO][02/12/2020 13:07:01] loss: 0.3685\n", | |
"[INFO][02/12/2020 13:07:01] spearman: 38.79\n", | |
"[INFO][02/12/2020 13:07:38] Step 750 | loss 0.3640 | lr 5.87e-05 | 0.293s per step\n", | |
"[INFO][02/12/2020 13:08:14] Step 900 | loss 0.3591 | lr 4.13e-05 | 0.236s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.40it/s]\n", | |
"[INFO][02/12/2020 13:08:20] Metrics at step 900:\n", | |
"[INFO][02/12/2020 13:08:20] loss: 0.3633\n", | |
"[INFO][02/12/2020 13:08:20] spearman: 39.45\n", | |
"[INFO][02/12/2020 13:08:58] Step 1050 | loss 0.3544 | lr 2.50e-05 | 0.294s per step\n", | |
"[INFO][02/12/2020 13:09:33] Step 1200 | loss 0.3488 | lr 1.17e-05 | 0.232s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.41it/s]\n", | |
"[INFO][02/12/2020 13:09:39] Metrics at step 1200:\n", | |
"[INFO][02/12/2020 13:09:39] loss: 0.3622\n", | |
"[INFO][02/12/2020 13:09:39] spearman: 40.40\n", | |
"[INFO][02/12/2020 13:10:17] Step 1350 | loss 0.3452 | lr 3.03e-06 | 0.292s per step\n", | |
"[INFO][02/12/2020 13:10:52] Step 1500 | loss 0.3415 | lr 1.00e-08 | 0.234s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.55it/s]\n", | |
"[INFO][02/12/2020 13:10:58] Metrics at step 1500:\n", | |
"[INFO][02/12/2020 13:10:58] loss: 0.3619\n", | |
"[INFO][02/12/2020 13:10:58] spearman: 40.46\n", | |
"[INFO][02/12/2020 13:11:01] Training finished. Best step(s):\n", | |
"[INFO][02/12/2020 13:11:01] loss: 0.3619 @ step 1500\n", | |
"[INFO][02/12/2020 13:11:01] spearman: 40.46 @ step 1500\n", | |
"====================\n", | |
"Training Fold 5\n", | |
"====================\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"REPLICAS: 8\n", | |
"cnt: 4864 batch size: 16\n", | |
"cnt: 1215 batch size: 32\n", | |
"Model: \"dual_roberta_model_4\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_194 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_12 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_13 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_14 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 3,278,046\n", | |
"Non-trainable params: 124,645,632\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Train for 912 steps\n", | |
"912/912 [==============================] - 125s 138ms/step - loss: 0.4479\n", | |
"Model: \"dual_roberta_model_4\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"roberta_question (RobertaEnc multiple 124645632 \n", | |
"_________________________________________________________________\n", | |
"dropout_194 (Dropout) multiple 0 \n", | |
"_________________________________________________________________\n", | |
"q_classifier (Dense) multiple 16149 \n", | |
"_________________________________________________________________\n", | |
"a_classifier (Dense) multiple 3845 \n", | |
"_________________________________________________________________\n", | |
"j_classifier (Dense) multiple 9220 \n", | |
"_________________________________________________________________\n", | |
"se_layer_12 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_13 (SELayer) multiple 295872 \n", | |
"_________________________________________________________________\n", | |
"se_layer_14 (SELayer) multiple 2657088 \n", | |
"=================================================================\n", | |
"Total params: 127,923,678\n", | |
"Trainable params: 127,923,678\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n", | |
"Steps per epoch: 304 | 38\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", | |
" \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"WARNING:tensorflow:Gradients do not exist for variables ['roberta_question/roberta/pooler/dense/kernel:0', 'roberta_question/roberta/pooler/dense/bias:0'] when minimizing the loss.\n", | |
"[INFO][02/12/2020 13:16:25] Step 150 | loss 0.3952 | lr 1.00e-04 | 0.942s per step\n", | |
"[INFO][02/12/2020 13:17:00] Step 300 | loss 0.3841 | lr 9.70e-05 | 0.232s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:39<00:00, 5.66s/it]\n", | |
"[INFO][02/12/2020 13:17:39] Metrics at step 300:\n", | |
"[INFO][02/12/2020 13:17:39] loss: 0.3764\n", | |
"[INFO][02/12/2020 13:17:39] spearman: 35.93\n", | |
"[INFO][02/12/2020 13:18:18] Step 450 | loss 0.3748 | lr 8.83e-05 | 0.518s per step\n", | |
"[INFO][02/12/2020 13:18:53] Step 600 | loss 0.3694 | lr 7.50e-05 | 0.234s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.41it/s]\n", | |
"[INFO][02/12/2020 13:18:59] Metrics at step 600:\n", | |
"[INFO][02/12/2020 13:18:59] loss: 0.3676\n", | |
"[INFO][02/12/2020 13:18:59] spearman: 37.72\n", | |
"[INFO][02/12/2020 13:19:37] Step 750 | loss 0.3620 | lr 5.87e-05 | 0.295s per step\n", | |
"[INFO][02/12/2020 13:20:12] Step 900 | loss 0.3578 | lr 4.13e-05 | 0.237s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.66it/s]\n", | |
"[INFO][02/12/2020 13:20:19] Metrics at step 900:\n", | |
"[INFO][02/12/2020 13:20:19] loss: 0.3675\n", | |
"[INFO][02/12/2020 13:20:19] spearman: 38.94\n", | |
"[INFO][02/12/2020 13:20:57] Step 1050 | loss 0.3533 | lr 2.50e-05 | 0.294s per step\n", | |
"[INFO][02/12/2020 13:21:32] Step 1200 | loss 0.3480 | lr 1.17e-05 | 0.235s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.50it/s]\n", | |
"[INFO][02/12/2020 13:21:38] Metrics at step 1200:\n", | |
"[INFO][02/12/2020 13:21:38] loss: 0.3664\n", | |
"[INFO][02/12/2020 13:21:38] spearman: 39.21\n", | |
"[INFO][02/12/2020 13:22:16] Step 1350 | loss 0.3440 | lr 3.03e-06 | 0.293s per step\n", | |
"[INFO][02/12/2020 13:22:51] Step 1500 | loss 0.3395 | lr 1.00e-08 | 0.234s per step\n", | |
"100%|███████████████████████████████████████████████████████████████| 38/38 [00:06<00:00, 6.42it/s]\n", | |
"[INFO][02/12/2020 13:22:57] Metrics at step 1500:\n", | |
"[INFO][02/12/2020 13:22:57] loss: 0.3675\n", | |
"[INFO][02/12/2020 13:22:57] spearman: 39.32\n", | |
"[INFO][02/12/2020 13:23:00] Training finished. Best step(s):\n", | |
"[INFO][02/12/2020 13:23:00] loss: 0.3664 @ step 1200\n", | |
"[INFO][02/12/2020 13:23:00] spearman: 39.32 @ step 1500\n", | |
"Scores: -0.402150424863349 +- 0.006430421567132591\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "poKVYv5Yyt7g", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Post-processing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "n2BCT77lBMPV", | |
"colab_type": "code", | |
"outputId": "ce0fb3b9-2446-4334-bac4-2a98aad16fe4", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"import sklearn\n", | |
"sklearn.__version__" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'0.22.1'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 16 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9UKyT0e1tKSl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "7dfa2a29-8e3e-4ffe-88c0-1d06e82075e4" | |
}, | |
"source": [ | |
"!rm cache/oof.jl" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"rm: cannot remove 'cache/oof.jl': No such file or directory\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "suwGiS83enOk", | |
"colab_type": "code", | |
"outputId": "4134e3c2-a429-4547-ae65-9ec8ffd01177", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 590 | |
} | |
}, | |
"source": [ | |
"!python -m quest.eval_tpu -batch-size 8 --model-pattern \"cache/kaggleds/roberta-base-fold-%d.h5\"" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"2020-02-12 13:37:08.269938: E tensorflow/stream_executor/cuda/cuda_driver.cc:351] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n", | |
"INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0\n", | |
"cnt: 1216 batch size: 64\n", | |
"100%|███████████████████████████████████████████████████████████████| 19/19 [00:30<00:00, 1.32it/s]\n", | |
"Raw Spearman: 40.60\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"cnt: 1216 batch size: 64\n", | |
"100%|███████████████████████████████████████████████████████████████| 19/19 [00:29<00:00, 1.28it/s]\n", | |
"Raw Spearman: 39.63\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"cnt: 1216 batch size: 64\n", | |
"100%|███████████████████████████████████████████████████████████████| 19/19 [00:29<00:00, 1.28it/s]\n", | |
"Raw Spearman: 41.06\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"cnt: 1216 batch size: 64\n", | |
"100%|███████████████████████████████████████████████████████████████| 19/19 [00:29<00:00, 1.27it/s]\n", | |
"Raw Spearman: 40.46\n", | |
"Running on TPU ['10.7.137.26:8470']\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"WARNING:tensorflow:TPU system 10.7.137.26:8470 has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.\n", | |
"cnt: 1215 batch size: 64\n", | |
"100%|███████████████████████████████████████████████████████████████| 19/19 [00:46<00:00, 6.10s/it]\n", | |
"Raw Spearman: 39.32\n", | |
"Raw Spearman: 40.15\n", | |
"Optimized Spearman: 43.00\n", | |
"[50, 26, 5, 24, 13, 2, 21, 18, 11, 22, 30, 4, 4, 3, 4, 5, 13, 18, 36, 2, 24, 17, 14, 46, 27, 48, 53, 22, 19, 24]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OVufYclvsm0s", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!mv cache/best_bins.jl cache/kaggleds/best_bins.jl" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "xmzyPUZFywO8", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Upload the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8T_eh5vr_ZOF", | |
"colab_type": "code", | |
"outputId": "f11166a6-9a5f-47c1-8963-4ce50ba700c3", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 399 | |
} | |
}, | |
"source": [ | |
"!cd cache/kaggleds && kaggle datasets version --dir-mode tar -m \"5fold\" -d" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Starting upload for file roberta-base-fold-3.h5\n", | |
"100% 488M/488M [00:05<00:00, 99.5MB/s]\n", | |
"Upload successful: roberta-base-fold-3.h5 (488MB)\n", | |
"Starting upload for file best_bins.jl\n", | |
"100% 1.22k/1.22k [00:00<00:00, 1.58kB/s]\n", | |
"Upload successful: best_bins.jl (1KB)\n", | |
"Starting upload for file roberta-base-fold-0.h5\n", | |
"100% 488M/488M [00:03<00:00, 133MB/s]\n", | |
"Upload successful: roberta-base-fold-0.h5 (488MB)\n", | |
"Starting upload for file tokenizer_roberta-base.tar\n", | |
"100% 1.30M/1.30M [00:01<00:00, 1.26MB/s]\n", | |
"Upload successful: tokenizer_roberta-base.tar (1MB)\n", | |
"Starting upload for file roberta-base-fold-1.h5\n", | |
"100% 488M/488M [00:04<00:00, 117MB/s]\n", | |
"Upload successful: roberta-base-fold-1.h5 (488MB)\n", | |
"Starting upload for file roberta-base-fold-2.h5\n", | |
"100% 488M/488M [00:04<00:00, 123MB/s]\n", | |
"Upload successful: roberta-base-fold-2.h5 (488MB)\n", | |
"Starting upload for file roberta-base-fold-4.h5\n", | |
"100% 488M/488M [00:03<00:00, 134MB/s]\n", | |
"Upload successful: roberta-base-fold-4.h5 (488MB)\n", | |
"Dataset version is being created. Please check progress at https://www.kaggle.com/ceshine/quest-models-public\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_epkw2mZpgnW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment