Skip to content

Instantly share code, notes, and snippets.

@iam-abbas
Last active December 31, 2020 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iam-abbas/307ed1ac92cb288859345366fdf15e74 to your computer and use it in GitHub Desktop.
Save iam-abbas/307ed1ac92cb288859345366fdf15e74 to your computer and use it in GitHub Desktop.
GPT2 Horror.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "GPT2 Horror.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyMIl7puQAbKypz3VA2zgjT8",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"1899861f4ed64cb89a9481f5a8fa4a2e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_e14589f6b0fb48869f2a7f96154173b7",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_48bda53ec37045f085520e23242d6d00",
"IPY_MODEL_cae29f75c54640d9b474ece68f56c392"
]
}
},
"e14589f6b0fb48869f2a7f96154173b7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"48bda53ec37045f085520e23242d6d00": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_0b4b0024a90645b0853c264fe0946867",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 716,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 716,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_e8553d1e66204000aef6076ba534af7d"
}
},
"cae29f75c54640d9b474ece68f56c392": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_8052912abf614c718fbaab3a275b33b2",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 716/716 [01:20<00:00, 8.89it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_a63f5904e92143e0ab44a96b7f052622"
}
},
"0b4b0024a90645b0853c264fe0946867": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"e8553d1e66204000aef6076ba534af7d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8052912abf614c718fbaab3a275b33b2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"a63f5904e92143e0ab44a96b7f052622": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/iam-abbas/307ed1ac92cb288859345366fdf15e74/gpt2-horror.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "H_5EeUiNrP5S"
},
"source": [
"## Generating Horror Stories using GPT-2"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BqFIT1YCrViW"
},
"source": [
"### Install required libraries"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ey3VCJmNdjM8",
"outputId": "a25fbc62-be25-4fd7-c414-80442d879844"
},
"source": [
"# install required libraries are not installed\r\n",
"!pip install torch -qq\r\n",
"!pip install transformers -qq\r\n",
"!pip install -q praw\r\n",
"!pip install tweepy -qq\r\n",
"!pip install datasets"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[K |████████████████████████████████| 1.5MB 4.0MB/s \n",
"\u001b[K |████████████████████████████████| 890kB 53.3MB/s \n",
"\u001b[K |████████████████████████████████| 2.9MB 47.8MB/s \n",
"\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[K |████████████████████████████████| 153kB 5.5MB/s \n",
"\u001b[K |████████████████████████████████| 204kB 32.7MB/s \n",
"\u001b[?25h"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jpC6i6ITrZLj"
},
"source": [
"### Get the files needed for fine-tuning"
]
},
{
"cell_type": "code",
"metadata": {
"id": "QGFKGMI7dz1z"
},
"source": [
"# HuggingFace scripts for fine-tuning models and language generation\r\n",
"!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/text-generation/run_generation.py -q"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zNSwZr1aei7t"
},
"source": [
"!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/language-modeling/run_clm.py -q"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "CQ6BDeqzreJw"
},
"source": [
"### Importing all the required libraries"
]
},
{
"cell_type": "code",
"metadata": {
"id": "UBN6CCY3eFbT"
},
"source": [
"import praw\r\n",
"import json\r\n",
"import re\r\n",
"from tqdm import tqdm_notebook as tqdm\r\n",
"import random\r\n",
"import torch"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "cHia3CgHriH_"
},
"source": [
"### Loading the Reddit Developer API credentials that are stored in a JSON file"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Yjk8EiFOd1aw"
},
"source": [
"with open('credentials.json') as file:\r\n",
" creds = json.loads(file.read())"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "40hhlJoQd5Sb"
},
"source": [
"scraper = praw.Reddit(client_id=creds['client_id'], client_secret=creds['client_secret'], user_agent=creds['user_agent'])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "DqNxWLHBfIDo"
},
"source": [
"sub = \"nosleep\""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "SsuHGp_7rogA"
},
"source": [
"### Fetching stories from `r/nosleep` into a list"
]
},
{
"cell_type": "code",
"metadata": {
"id": "9SYRaI-4fLXe"
},
"source": [
"stories = list(scraper.subreddit(sub).hot(limit=10000))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "IK9OR4zIrwSP"
},
"source": [
"### Doing some cleaning on text using regex"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 117,
"referenced_widgets": [
"1899861f4ed64cb89a9481f5a8fa4a2e",
"e14589f6b0fb48869f2a7f96154173b7",
"48bda53ec37045f085520e23242d6d00",
"cae29f75c54640d9b474ece68f56c392",
"0b4b0024a90645b0853c264fe0946867",
"e8553d1e66204000aef6076ba534af7d",
"8052912abf614c718fbaab3a275b33b2",
"a63f5904e92143e0ab44a96b7f052622"
]
},
"id": "r_5eRvNGfRAb",
"outputId": "a559df1d-bf7f-4c04-aacd-b658803508cd"
},
"source": [
"clean_stories = []\r\n",
"for story in tqdm(stories):\r\n",
" story = re.sub(\"\\[[^)]*\\)\", \"\", story.selftext)\r\n",
" if story:\r\n",
" clean_stories.append(story.replace(\"\\n\", \" \"))\r\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n",
" \n"
],
"name": "stderr"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1899861f4ed64cb89a9481f5a8fa4a2e",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=716.0), HTML(value='')))"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fYtRLBB1r0xI"
},
"source": [
"## preparing the dataset for Training and Validation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EYMsDluFfUQ1"
},
"source": [
"# shuffle data\r\n",
"random.shuffle(clean_stories)\r\n",
"\r\n",
"# fraction of training data\r\n",
"split_train_valid = 0.9\r\n",
"\r\n",
"# split dataset\r\n",
"train_size = int(split_train_valid * len(clean_stories))\r\n",
"valid_size = len(clean_stories) - train_size\r\n",
"train_dataset, valid_dataset = torch.utils.data.random_split(clean_stories, [train_size, valid_size])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "G-3wQ5jDfi6l"
},
"source": [
"def make_dataset(dataset, epochs):\r\n",
" total_text = '<|endoftext|>'\r\n",
" tweets = [t for t in dataset]\r\n",
" for _ in range(epochs):\r\n",
" random.shuffle(tweets)\r\n",
" total_text += '<|endoftext|>'.join(tweets) + '<|endoftext|>'\r\n",
" return total_text"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "NWzbVpxKr8OK"
},
"source": [
"### Saving the training and validation data into respective files"
]
},
{
"cell_type": "code",
"metadata": {
"id": "RUZQW3Oufkye"
},
"source": [
"EPOCHS = 4\r\n",
"\r\n",
"with open('{}_train.txt'.format(sub), 'w') as f:\r\n",
" data = make_dataset(train_dataset, EPOCHS)\r\n",
" f.write(data)\r\n",
"\r\n",
"with open('{}_valid.txt'.format(sub), 'w') as f:\r\n",
" data = make_dataset(valid_dataset, 1)\r\n",
" f.write(data)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "Qe9GJYP2sA-W"
},
"source": [
"## Using Transformer's Casual Language Model fin-tuning scruipt to re-train the model on custom dataset and saving the weights and checkpoints at output/nosleep"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "S8dpmqodihMt",
"outputId": "aa464b0e-bee6-41c4-b56a-bb71cf03439b"
},
"source": [
"!python run_clm.py \\\r\n",
" --output_dir=output/$sub \\\r\n",
" --overwrite_output_dir \\\r\n",
" --overwrite_cache \\\r\n",
" --model_type=gpt2 \\\r\n",
" --model_name_or_path=gpt2 \\\r\n",
" --do_train \\\r\n",
" --do_eval \\\r\n",
" --train_file=$sub\\_train.txt \\\r\n",
" --validation_file=$sub\\_valid.txt \\\r\n",
" --eval_steps 20 \\\r\n",
" --logging_steps 20 \\\r\n",
" --per_gpu_train_batch_size 1 \\\r\n",
" --num_train_epochs 1"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"2020-12-31 11:28:23.993065: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n",
"12/31/2020 11:28:26 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"12/31/2020 11:28:26 - INFO - __main__ - Training/evaluation parameters TrainingArguments(output_dir='output/nosleep', overwrite_output_dir=True, do_train=True, do_eval=True, do_predict=False, model_parallel=False, evaluation_strategy=<EvaluationStrategy.NO: 'no'>, prediction_loss_only=False, per_device_train_batch_size=8, per_device_eval_batch_size=8, per_gpu_train_batch_size=1, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=5e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, warmup_steps=0, logging_dir='runs/Dec31_11-28-26_9398038d5345', logging_first_step=False, logging_steps=20, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, debug=False, dataloader_drop_last=False, eval_steps=20, dataloader_num_workers=0, past_index=-1, run_name='output/nosleep', disable_tqdm=False, remove_unused_columns=True, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, fp16_backend='auto', sharded_ddp=False)\n",
"Downloading: 2.57kB [00:00, 2.52MB/s] \n",
"Using custom data configuration default\n",
"Downloading and preparing dataset text/default-1e45ef12cc6822eb (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/text/default-1e45ef12cc6822eb/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab...\n",
"Dataset text downloaded and prepared to /root/.cache/huggingface/datasets/text/default-1e45ef12cc6822eb/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab. Subsequent calls will reuse this data.\n",
"[INFO|configuration_utils.py:431] 2020-12-31 11:28:29,191 >> loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
"[INFO|configuration_utils.py:467] 2020-12-31 11:28:29,192 >> Model config GPT2Config {\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"gradient_checkpointing\": false,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"resid_pdrop\": 0.1,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|configuration_utils.py:431] 2020-12-31 11:28:29,993 >> loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
"[INFO|configuration_utils.py:467] 2020-12-31 11:28:29,994 >> Model config GPT2Config {\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"gradient_checkpointing\": false,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"resid_pdrop\": 0.1,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
"[INFO|modeling_utils.py:1024] 2020-12-31 11:28:32,288 >> loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
"[INFO|modeling_utils.py:1140] 2020-12-31 11:28:37,614 >> All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
"\n",
"[INFO|modeling_utils.py:1149] 2020-12-31 11:28:37,614 >> All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
"[WARNING|tokenization_utils_base.py:3233] 2020-12-31 11:28:38,161 >> Token indices sequence length is longer than the specified maximum sequence length for this model (175756 > 1024). Running this sequence through the model will result in indexing errors\n",
"100% 3/3 [00:19<00:00, 6.38s/ba]\n",
"100% 1/1 [00:00<00:00, 1.57ba/s]\n",
"100% 3/3 [00:53<00:00, 17.75s/ba]\n",
"100% 1/1 [00:00<00:00, 5.50ba/s]\n",
"[INFO|trainer.py:388] 2020-12-31 11:30:02,192 >> The following columns in the training set don't have a corresponding argument in `GPT2LMHeadModel.forward` and have been ignored: .\n",
"[INFO|trainer.py:388] 2020-12-31 11:30:02,192 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2LMHeadModel.forward` and have been ignored: .\n",
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,193 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n",
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,196 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n",
"[INFO|trainer.py:703] 2020-12-31 11:30:02,196 >> ***** Running training *****\n",
"[INFO|trainer.py:704] 2020-12-31 11:30:02,196 >> Num examples = 7360\n",
"[INFO|trainer.py:705] 2020-12-31 11:30:02,196 >> Num Epochs = 1\n",
"[INFO|trainer.py:706] 2020-12-31 11:30:02,196 >> Instantaneous batch size per device = 8\n",
"[INFO|trainer.py:707] 2020-12-31 11:30:02,196 >> Total train batch size (w. parallel, distributed & accumulation) = 1\n",
"[INFO|trainer.py:708] 2020-12-31 11:30:02,196 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:709] 2020-12-31 11:30:02,196 >> Total optimization steps = 7360\n",
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,202 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n",
"{'loss': 3.512045669555664, 'learning_rate': 4.986413043478261e-05, 'epoch': 0.002717391304347826}\n",
"{'loss': 3.3283241271972654, 'learning_rate': 4.9728260869565216e-05, 'epoch': 0.005434782608695652}\n",
"{'loss': 3.364488983154297, 'learning_rate': 4.959239130434783e-05, 'epoch': 0.008152173913043478}\n",
"{'loss': 3.3092582702636717, 'learning_rate': 4.945652173913044e-05, 'epoch': 0.010869565217391304}\n",
"{'loss': 3.2144798278808593, 'learning_rate': 4.932065217391305e-05, 'epoch': 0.01358695652173913}\n",
"{'loss': 3.2673980712890627, 'learning_rate': 4.918478260869566e-05, 'epoch': 0.016304347826086956}\n",
"{'loss': 3.3383720397949217, 'learning_rate': 4.904891304347826e-05, 'epoch': 0.019021739130434784}\n",
"{'loss': 3.1150543212890627, 'learning_rate': 4.891304347826087e-05, 'epoch': 0.021739130434782608}\n",
"{'loss': 3.223776626586914, 'learning_rate': 4.8777173913043476e-05, 'epoch': 0.024456521739130436}\n",
"{'loss': 3.2124401092529298, 'learning_rate': 4.8641304347826086e-05, 'epoch': 0.02717391304347826}\n",
"{'loss': 3.115188789367676, 'learning_rate': 4.8505434782608696e-05, 'epoch': 0.029891304347826088}\n",
"{'loss': 3.172593116760254, 'learning_rate': 4.836956521739131e-05, 'epoch': 0.03260869565217391}\n",
"{'loss': 3.145791435241699, 'learning_rate': 4.823369565217392e-05, 'epoch': 0.035326086956521736}\n",
"{'loss': 3.1447931289672852, 'learning_rate': 4.809782608695653e-05, 'epoch': 0.03804347826086957}\n",
"{'loss': 3.1974361419677733, 'learning_rate': 4.796195652173913e-05, 'epoch': 0.04076086956521739}\n",
"{'loss': 3.1108156204223634, 'learning_rate': 4.782608695652174e-05, 'epoch': 0.043478260869565216}\n",
"{'loss': 3.144238090515137, 'learning_rate': 4.7690217391304345e-05, 'epoch': 0.04619565217391304}\n",
"{'loss': 3.1227893829345703, 'learning_rate': 4.7554347826086956e-05, 'epoch': 0.04891304347826087}\n",
"{'loss': 3.0857648849487305, 'learning_rate': 4.741847826086957e-05, 'epoch': 0.051630434782608696}\n",
"{'loss': 3.2329044342041016, 'learning_rate': 4.7282608695652177e-05, 'epoch': 0.05434782608695652}\n",
"{'loss': 3.1727392196655275, 'learning_rate': 4.714673913043479e-05, 'epoch': 0.057065217391304345}\n",
"{'loss': 3.2452533721923826, 'learning_rate': 4.701086956521739e-05, 'epoch': 0.059782608695652176}\n",
"{'loss': 3.2963642120361327, 'learning_rate': 4.6875e-05, 'epoch': 0.0625}\n",
"{'loss': 3.2465747833251952, 'learning_rate': 4.673913043478261e-05, 'epoch': 0.06521739130434782}\n",
"{'loss': 3.151161003112793, 'learning_rate': 4.660326086956522e-05, 'epoch': 0.06793478260869565}\n",
" 7% 500/7360 [01:15<17:13, 6.64it/s][INFO|trainer.py:1226] 2020-12-31 11:31:17,578 >> Saving model checkpoint to output/nosleep/checkpoint-500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:31:17,579 >> Configuration saved in output/nosleep/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:31:19,343 >> Model weights saved in output/nosleep/checkpoint-500/pytorch_model.bin\n",
"{'loss': 3.2976707458496093, 'learning_rate': 4.646739130434783e-05, 'epoch': 0.07065217391304347}\n",
"{'loss': 3.0835899353027343, 'learning_rate': 4.6331521739130436e-05, 'epoch': 0.07336956521739131}\n",
"{'loss': 3.1966699600219726, 'learning_rate': 4.6195652173913046e-05, 'epoch': 0.07608695652173914}\n",
"{'loss': 3.1216732025146485, 'learning_rate': 4.6059782608695657e-05, 'epoch': 0.07880434782608696}\n",
"{'loss': 3.1534637451171874, 'learning_rate': 4.592391304347826e-05, 'epoch': 0.08152173913043478}\n",
"{'loss': 3.0839128494262695, 'learning_rate': 4.578804347826087e-05, 'epoch': 0.08423913043478261}\n",
"{'loss': 3.0870521545410154, 'learning_rate': 4.565217391304348e-05, 'epoch': 0.08695652173913043}\n",
"{'loss': 3.0487434387207033, 'learning_rate': 4.551630434782609e-05, 'epoch': 0.08967391304347826}\n",
"{'loss': 3.1483850479125977, 'learning_rate': 4.53804347826087e-05, 'epoch': 0.09239130434782608}\n",
"{'loss': 3.2189189910888674, 'learning_rate': 4.5244565217391305e-05, 'epoch': 0.09510869565217392}\n",
"{'loss': 3.171872138977051, 'learning_rate': 4.5108695652173916e-05, 'epoch': 0.09782608695652174}\n",
"{'loss': 3.1267330169677736, 'learning_rate': 4.4972826086956526e-05, 'epoch': 0.10054347826086957}\n",
"{'loss': 3.0810239791870115, 'learning_rate': 4.483695652173913e-05, 'epoch': 0.10326086956521739}\n",
"{'loss': 3.056863212585449, 'learning_rate': 4.470108695652174e-05, 'epoch': 0.10597826086956522}\n",
"{'loss': 3.1936498641967774, 'learning_rate': 4.456521739130435e-05, 'epoch': 0.10869565217391304}\n",
"{'loss': 3.0913599014282225, 'learning_rate': 4.442934782608696e-05, 'epoch': 0.11141304347826086}\n",
"{'loss': 3.075895309448242, 'learning_rate': 4.429347826086957e-05, 'epoch': 0.11413043478260869}\n",
"{'loss': 3.0935127258300783, 'learning_rate': 4.4157608695652175e-05, 'epoch': 0.11684782608695653}\n",
"{'loss': 3.05960750579834, 'learning_rate': 4.4021739130434786e-05, 'epoch': 0.11956521739130435}\n",
"{'loss': 3.026990509033203, 'learning_rate': 4.388586956521739e-05, 'epoch': 0.12228260869565218}\n",
"{'loss': 3.117096710205078, 'learning_rate': 4.375e-05, 'epoch': 0.125}\n",
"{'loss': 3.229228973388672, 'learning_rate': 4.361413043478261e-05, 'epoch': 0.12771739130434784}\n",
"{'loss': 3.178273391723633, 'learning_rate': 4.347826086956522e-05, 'epoch': 0.13043478260869565}\n",
"{'loss': 3.1459720611572264, 'learning_rate': 4.334239130434783e-05, 'epoch': 0.1331521739130435}\n",
"{'loss': 2.9881481170654296, 'learning_rate': 4.3206521739130434e-05, 'epoch': 0.1358695652173913}\n",
" 14% 1000/7360 [02:36<15:50, 6.69it/s][INFO|trainer.py:1226] 2020-12-31 11:32:38,577 >> Saving model checkpoint to output/nosleep/checkpoint-1000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:32:38,579 >> Configuration saved in output/nosleep/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:32:40,184 >> Model weights saved in output/nosleep/checkpoint-1000/pytorch_model.bin\n",
"{'loss': 3.059023857116699, 'learning_rate': 4.3070652173913045e-05, 'epoch': 0.13858695652173914}\n",
"{'loss': 3.04290657043457, 'learning_rate': 4.2934782608695655e-05, 'epoch': 0.14130434782608695}\n",
"{'loss': 2.915846824645996, 'learning_rate': 4.279891304347826e-05, 'epoch': 0.14402173913043478}\n",
"{'loss': 3.018391799926758, 'learning_rate': 4.266304347826087e-05, 'epoch': 0.14673913043478262}\n",
"{'loss': 3.1145843505859374, 'learning_rate': 4.252717391304348e-05, 'epoch': 0.14945652173913043}\n",
"{'loss': 3.124193000793457, 'learning_rate': 4.239130434782609e-05, 'epoch': 0.15217391304347827}\n",
"{'loss': 3.1068984985351564, 'learning_rate': 4.22554347826087e-05, 'epoch': 0.15489130434782608}\n",
"{'loss': 3.100969696044922, 'learning_rate': 4.2119565217391304e-05, 'epoch': 0.15760869565217392}\n",
"{'loss': 3.0650775909423826, 'learning_rate': 4.1983695652173914e-05, 'epoch': 0.16032608695652173}\n",
"{'loss': 3.067531204223633, 'learning_rate': 4.1847826086956525e-05, 'epoch': 0.16304347826086957}\n",
"{'loss': 3.1300127029418947, 'learning_rate': 4.171195652173913e-05, 'epoch': 0.16576086956521738}\n",
"{'loss': 3.0881221771240233, 'learning_rate': 4.1576086956521746e-05, 'epoch': 0.16847826086956522}\n",
"{'loss': 3.1220754623413085, 'learning_rate': 4.144021739130435e-05, 'epoch': 0.17119565217391305}\n",
"{'loss': 3.222932815551758, 'learning_rate': 4.130434782608696e-05, 'epoch': 0.17391304347826086}\n",
"{'loss': 3.0593658447265626, 'learning_rate': 4.116847826086957e-05, 'epoch': 0.1766304347826087}\n",
"{'loss': 3.018812561035156, 'learning_rate': 4.1032608695652174e-05, 'epoch': 0.1793478260869565}\n",
"{'loss': 3.0209110260009764, 'learning_rate': 4.0896739130434784e-05, 'epoch': 0.18206521739130435}\n",
"{'loss': 3.059507369995117, 'learning_rate': 4.076086956521739e-05, 'epoch': 0.18478260869565216}\n",
"{'loss': 3.0747259140014647, 'learning_rate': 4.0625000000000005e-05, 'epoch': 0.1875}\n",
"{'loss': 3.0447511672973633, 'learning_rate': 4.0489130434782615e-05, 'epoch': 0.19021739130434784}\n",
"{'loss': 3.0659013748168946, 'learning_rate': 4.035326086956522e-05, 'epoch': 0.19293478260869565}\n",
"{'loss': 2.887067413330078, 'learning_rate': 4.021739130434783e-05, 'epoch': 0.1956521739130435}\n",
"{'loss': 3.0112159729003904, 'learning_rate': 4.008152173913043e-05, 'epoch': 0.1983695652173913}\n",
"{'loss': 3.152726936340332, 'learning_rate': 3.994565217391304e-05, 'epoch': 0.20108695652173914}\n",
"{'loss': 3.0397472381591797, 'learning_rate': 3.9809782608695654e-05, 'epoch': 0.20380434782608695}\n",
" 20% 1500/7360 [03:58<14:44, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:34:00,695 >> Saving model checkpoint to output/nosleep/checkpoint-1500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:34:00,697 >> Configuration saved in output/nosleep/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:34:02,420 >> Model weights saved in output/nosleep/checkpoint-1500/pytorch_model.bin\n",
"{'loss': 2.997607612609863, 'learning_rate': 3.9673913043478264e-05, 'epoch': 0.20652173913043478}\n",
"{'loss': 3.096424865722656, 'learning_rate': 3.9538043478260875e-05, 'epoch': 0.20923913043478262}\n",
"{'loss': 3.0459573745727537, 'learning_rate': 3.940217391304348e-05, 'epoch': 0.21195652173913043}\n",
"{'loss': 3.046352577209473, 'learning_rate': 3.926630434782609e-05, 'epoch': 0.21467391304347827}\n",
"{'loss': 3.054677391052246, 'learning_rate': 3.91304347826087e-05, 'epoch': 0.21739130434782608}\n",
"{'loss': 2.96755428314209, 'learning_rate': 3.89945652173913e-05, 'epoch': 0.22010869565217392}\n",
"{'loss': 3.183602714538574, 'learning_rate': 3.885869565217391e-05, 'epoch': 0.22282608695652173}\n",
"{'loss': 3.0821224212646485, 'learning_rate': 3.8722826086956523e-05, 'epoch': 0.22554347826086957}\n",
"{'loss': 3.0650859832763673, 'learning_rate': 3.8586956521739134e-05, 'epoch': 0.22826086956521738}\n",
"{'loss': 3.0474729537963867, 'learning_rate': 3.8451086956521744e-05, 'epoch': 0.23097826086956522}\n",
"{'loss': 2.991226387023926, 'learning_rate': 3.831521739130435e-05, 'epoch': 0.23369565217391305}\n",
"{'loss': 3.0636838912963866, 'learning_rate': 3.817934782608696e-05, 'epoch': 0.23641304347826086}\n",
"{'loss': 3.107697296142578, 'learning_rate': 3.804347826086957e-05, 'epoch': 0.2391304347826087}\n",
"{'loss': 3.0795093536376954, 'learning_rate': 3.790760869565217e-05, 'epoch': 0.2418478260869565}\n",
"{'loss': 3.0636316299438477, 'learning_rate': 3.777173913043478e-05, 'epoch': 0.24456521739130435}\n",
"{'loss': 3.1231103897094727, 'learning_rate': 3.763586956521739e-05, 'epoch': 0.24728260869565216}\n",
"{'loss': 3.0773599624633787, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.25}\n",
"{'loss': 3.0005971908569338, 'learning_rate': 3.7364130434782614e-05, 'epoch': 0.25271739130434784}\n",
"{'loss': 3.0077032089233398, 'learning_rate': 3.722826086956522e-05, 'epoch': 0.2554347826086957}\n",
"{'loss': 3.0258026123046875, 'learning_rate': 3.709239130434783e-05, 'epoch': 0.25815217391304346}\n",
"{'loss': 3.10824031829834, 'learning_rate': 3.695652173913043e-05, 'epoch': 0.2608695652173913}\n",
"{'loss': 3.023676872253418, 'learning_rate': 3.682065217391304e-05, 'epoch': 0.26358695652173914}\n",
"{'loss': 2.993247222900391, 'learning_rate': 3.668478260869566e-05, 'epoch': 0.266304347826087}\n",
"{'loss': 3.052256774902344, 'learning_rate': 3.654891304347826e-05, 'epoch': 0.26902173913043476}\n",
"{'loss': 3.0760807037353515, 'learning_rate': 3.641304347826087e-05, 'epoch': 0.2717391304347826}\n",
" 27% 2000/7360 [05:20<13:46, 6.49it/s][INFO|trainer.py:1226] 2020-12-31 11:35:22,941 >> Saving model checkpoint to output/nosleep/checkpoint-2000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:35:22,943 >> Configuration saved in output/nosleep/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:35:24,589 >> Model weights saved in output/nosleep/checkpoint-2000/pytorch_model.bin\n",
"{'loss': 3.013957214355469, 'learning_rate': 3.627717391304348e-05, 'epoch': 0.27445652173913043}\n",
"{'loss': 3.1673465728759767, 'learning_rate': 3.614130434782609e-05, 'epoch': 0.27717391304347827}\n",
"{'loss': 3.0540140151977537, 'learning_rate': 3.60054347826087e-05, 'epoch': 0.2798913043478261}\n",
"{'loss': 3.0425041198730467, 'learning_rate': 3.58695652173913e-05, 'epoch': 0.2826086956521739}\n",
"{'loss': 3.078123664855957, 'learning_rate': 3.573369565217392e-05, 'epoch': 0.28532608695652173}\n",
"{'loss': 3.130359649658203, 'learning_rate': 3.559782608695653e-05, 'epoch': 0.28804347826086957}\n",
"{'loss': 3.161348342895508, 'learning_rate': 3.546195652173913e-05, 'epoch': 0.2907608695652174}\n",
"{'loss': 2.9576461791992186, 'learning_rate': 3.532608695652174e-05, 'epoch': 0.29347826086956524}\n",
"{'loss': 3.111285400390625, 'learning_rate': 3.5190217391304346e-05, 'epoch': 0.296195652173913}\n",
"{'loss': 3.0396650314331053, 'learning_rate': 3.505434782608696e-05, 'epoch': 0.29891304347826086}\n",
"{'loss': 3.071572685241699, 'learning_rate': 3.491847826086957e-05, 'epoch': 0.3016304347826087}\n",
"{'loss': 3.0055158615112303, 'learning_rate': 3.478260869565218e-05, 'epoch': 0.30434782608695654}\n",
"{'loss': 2.9725093841552734, 'learning_rate': 3.464673913043479e-05, 'epoch': 0.3070652173913043}\n",
"{'loss': 2.9921489715576173, 'learning_rate': 3.451086956521739e-05, 'epoch': 0.30978260869565216}\n",
"{'loss': 3.0546697616577148, 'learning_rate': 3.4375e-05, 'epoch': 0.3125}\n",
"{'loss': 2.9978811264038088, 'learning_rate': 3.423913043478261e-05, 'epoch': 0.31521739130434784}\n",
"{'loss': 3.1139686584472654, 'learning_rate': 3.4103260869565216e-05, 'epoch': 0.3179347826086957}\n",
"{'loss': 3.1690120697021484, 'learning_rate': 3.3967391304347826e-05, 'epoch': 0.32065217391304346}\n",
"{'loss': 2.947348213195801, 'learning_rate': 3.383152173913044e-05, 'epoch': 0.3233695652173913}\n",
"{'loss': 3.0949867248535154, 'learning_rate': 3.369565217391305e-05, 'epoch': 0.32608695652173914}\n",
"{'loss': 2.9547935485839845, 'learning_rate': 3.355978260869566e-05, 'epoch': 0.328804347826087}\n",
"{'loss': 2.8996337890625, 'learning_rate': 3.342391304347826e-05, 'epoch': 0.33152173913043476}\n",
"{'loss': 2.9912178039550783, 'learning_rate': 3.328804347826087e-05, 'epoch': 0.3342391304347826}\n",
"{'loss': 3.1216081619262694, 'learning_rate': 3.3152173913043475e-05, 'epoch': 0.33695652173913043}\n",
"{'loss': 2.9457778930664062, 'learning_rate': 3.3016304347826086e-05, 'epoch': 0.33967391304347827}\n",
" 34% 2500/7360 [06:41<12:14, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:36:44,059 >> Saving model checkpoint to output/nosleep/checkpoint-2500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:36:44,060 >> Configuration saved in output/nosleep/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:36:45,639 >> Model weights saved in output/nosleep/checkpoint-2500/pytorch_model.bin\n",
"{'loss': 3.0662458419799803, 'learning_rate': 3.2880434782608696e-05, 'epoch': 0.3423913043478261}\n",
"{'loss': 2.989406967163086, 'learning_rate': 3.2744565217391307e-05, 'epoch': 0.3451086956521739}\n",
"{'loss': 3.053440475463867, 'learning_rate': 3.260869565217392e-05, 'epoch': 0.34782608695652173}\n",
"{'loss': 2.9125459671020506, 'learning_rate': 3.247282608695653e-05, 'epoch': 0.35054347826086957}\n",
"{'loss': 3.0044492721557616, 'learning_rate': 3.233695652173913e-05, 'epoch': 0.3532608695652174}\n",
"{'loss': 2.9832174301147463, 'learning_rate': 3.220108695652174e-05, 'epoch': 0.35597826086956524}\n",
"{'loss': 3.0895017623901366, 'learning_rate': 3.2065217391304345e-05, 'epoch': 0.358695652173913}\n",
"{'loss': 3.051135444641113, 'learning_rate': 3.1929347826086955e-05, 'epoch': 0.36141304347826086}\n",
"{'loss': 3.0140249252319338, 'learning_rate': 3.1793478260869566e-05, 'epoch': 0.3641304347826087}\n",
"{'loss': 2.9950437545776367, 'learning_rate': 3.1657608695652176e-05, 'epoch': 0.36684782608695654}\n",
"{'loss': 3.102407455444336, 'learning_rate': 3.152173913043479e-05, 'epoch': 0.3695652173913043}\n",
"{'loss': 3.0337915420532227, 'learning_rate': 3.138586956521739e-05, 'epoch': 0.37228260869565216}\n",
"{'loss': 3.0124744415283202, 'learning_rate': 3.125e-05, 'epoch': 0.375}\n",
"{'loss': 2.9697227478027344, 'learning_rate': 3.111413043478261e-05, 'epoch': 0.37771739130434784}\n",
"{'loss': 2.954401397705078, 'learning_rate': 3.0978260869565215e-05, 'epoch': 0.3804347826086957}\n",
"{'loss': 3.0465015411376952, 'learning_rate': 3.084239130434783e-05, 'epoch': 0.38315217391304346}\n",
"{'loss': 2.9104570388793944, 'learning_rate': 3.0706521739130435e-05, 'epoch': 0.3858695652173913}\n",
"{'loss': 2.96370792388916, 'learning_rate': 3.0570652173913046e-05, 'epoch': 0.38858695652173914}\n",
"{'loss': 3.0235336303710936, 'learning_rate': 3.0434782608695656e-05, 'epoch': 0.391304347826087}\n",
"{'loss': 2.9881025314331056, 'learning_rate': 3.029891304347826e-05, 'epoch': 0.39402173913043476}\n",
"{'loss': 2.9734582901000977, 'learning_rate': 3.016304347826087e-05, 'epoch': 0.3967391304347826}\n",
"{'loss': 3.036972427368164, 'learning_rate': 3.0027173913043477e-05, 'epoch': 0.39945652173913043}\n",
"{'loss': 3.0409944534301756, 'learning_rate': 2.9891304347826088e-05, 'epoch': 0.40217391304347827}\n",
"{'loss': 2.9913082122802734, 'learning_rate': 2.9755434782608698e-05, 'epoch': 0.4048913043478261}\n",
"{'loss': 2.992050552368164, 'learning_rate': 2.9619565217391305e-05, 'epoch': 0.4076086956521739}\n",
" 41% 3000/7360 [08:02<10:53, 6.67it/s][INFO|trainer.py:1226] 2020-12-31 11:38:05,098 >> Saving model checkpoint to output/nosleep/checkpoint-3000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:38:05,100 >> Configuration saved in output/nosleep/checkpoint-3000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:38:06,790 >> Model weights saved in output/nosleep/checkpoint-3000/pytorch_model.bin\n",
"{'loss': 3.023343086242676, 'learning_rate': 2.9483695652173916e-05, 'epoch': 0.41032608695652173}\n",
"{'loss': 2.97615966796875, 'learning_rate': 2.9347826086956526e-05, 'epoch': 0.41304347826086957}\n",
"{'loss': 3.0653865814208983, 'learning_rate': 2.921195652173913e-05, 'epoch': 0.4157608695652174}\n",
"{'loss': 3.013285255432129, 'learning_rate': 2.9076086956521743e-05, 'epoch': 0.41847826086956524}\n",
"{'loss': 3.074570083618164, 'learning_rate': 2.8940217391304347e-05, 'epoch': 0.421195652173913}\n",
"{'loss': 2.9360151290893555, 'learning_rate': 2.8804347826086957e-05, 'epoch': 0.42391304347826086}\n",
"{'loss': 3.05234375, 'learning_rate': 2.8668478260869568e-05, 'epoch': 0.4266304347826087}\n",
"{'loss': 2.893931579589844, 'learning_rate': 2.8532608695652175e-05, 'epoch': 0.42934782608695654}\n",
"{'loss': 2.9600061416625976, 'learning_rate': 2.8396739130434785e-05, 'epoch': 0.4320652173913043}\n",
"{'loss': 2.9860197067260743, 'learning_rate': 2.826086956521739e-05, 'epoch': 0.43478260869565216}\n",
"{'loss': 3.013777160644531, 'learning_rate': 2.8125000000000003e-05, 'epoch': 0.4375}\n",
"{'loss': 2.9207672119140624, 'learning_rate': 2.7989130434782613e-05, 'epoch': 0.44021739130434784}\n",
"{'loss': 2.9867345809936525, 'learning_rate': 2.7853260869565217e-05, 'epoch': 0.4429347826086957}\n",
"{'loss': 2.9700445175170898, 'learning_rate': 2.7717391304347827e-05, 'epoch': 0.44565217391304346}\n",
"{'loss': 3.021417999267578, 'learning_rate': 2.7581521739130434e-05, 'epoch': 0.4483695652173913}\n",
"{'loss': 3.0516029357910157, 'learning_rate': 2.7445652173913044e-05, 'epoch': 0.45108695652173914}\n",
"{'loss': 2.995887565612793, 'learning_rate': 2.7309782608695655e-05, 'epoch': 0.453804347826087}\n",
"{'loss': 2.996470260620117, 'learning_rate': 2.7173913043478262e-05, 'epoch': 0.45652173913043476}\n",
"{'loss': 2.8460014343261717, 'learning_rate': 2.7038043478260872e-05, 'epoch': 0.4592391304347826}\n",
"{'loss': 3.0168664932250975, 'learning_rate': 2.6902173913043476e-05, 'epoch': 0.46195652173913043}\n",
"{'loss': 2.973033905029297, 'learning_rate': 2.6766304347826086e-05, 'epoch': 0.46467391304347827}\n",
"{'loss': 2.910268211364746, 'learning_rate': 2.66304347826087e-05, 'epoch': 0.4673913043478261}\n",
"{'loss': 2.9323522567749025, 'learning_rate': 2.6494565217391304e-05, 'epoch': 0.4701086956521739}\n",
"{'loss': 3.0819915771484374, 'learning_rate': 2.6358695652173914e-05, 'epoch': 0.47282608695652173}\n",
"{'loss': 3.013107681274414, 'learning_rate': 2.6222826086956525e-05, 'epoch': 0.47554347826086957}\n",
" 48% 3500/7360 [09:24<09:46, 6.59it/s][INFO|trainer.py:1226] 2020-12-31 11:39:26,361 >> Saving model checkpoint to output/nosleep/checkpoint-3500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:39:26,362 >> Configuration saved in output/nosleep/checkpoint-3500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:39:28,029 >> Model weights saved in output/nosleep/checkpoint-3500/pytorch_model.bin\n",
"{'loss': 2.909230422973633, 'learning_rate': 2.608695652173913e-05, 'epoch': 0.4782608695652174}\n",
"{'loss': 3.0446781158447265, 'learning_rate': 2.5951086956521742e-05, 'epoch': 0.48097826086956524}\n",
"{'loss': 3.002200126647949, 'learning_rate': 2.5815217391304346e-05, 'epoch': 0.483695652173913}\n",
"{'loss': 3.04788875579834, 'learning_rate': 2.567934782608696e-05, 'epoch': 0.48641304347826086}\n",
"{'loss': 2.780653381347656, 'learning_rate': 2.554347826086957e-05, 'epoch': 0.4891304347826087}\n",
"{'loss': 2.972218704223633, 'learning_rate': 2.5407608695652173e-05, 'epoch': 0.49184782608695654}\n",
"{'loss': 3.0140708923339843, 'learning_rate': 2.5271739130434784e-05, 'epoch': 0.4945652173913043}\n",
"{'loss': 2.8474166870117186, 'learning_rate': 2.513586956521739e-05, 'epoch': 0.49728260869565216}\n",
"{'loss': 3.0726463317871096, 'learning_rate': 2.5e-05, 'epoch': 0.5}\n",
"{'loss': 3.059174346923828, 'learning_rate': 2.4864130434782608e-05, 'epoch': 0.5027173913043478}\n",
"{'loss': 2.915945816040039, 'learning_rate': 2.472826086956522e-05, 'epoch': 0.5054347826086957}\n",
"{'loss': 2.938085746765137, 'learning_rate': 2.459239130434783e-05, 'epoch': 0.5081521739130435}\n",
"{'loss': 2.9889217376708985, 'learning_rate': 2.4456521739130436e-05, 'epoch': 0.5108695652173914}\n",
"{'loss': 3.0353281021118166, 'learning_rate': 2.4320652173913043e-05, 'epoch': 0.5135869565217391}\n",
"{'loss': 3.0933725357055666, 'learning_rate': 2.4184782608695653e-05, 'epoch': 0.5163043478260869}\n",
"{'loss': 2.8329307556152346, 'learning_rate': 2.4048913043478264e-05, 'epoch': 0.5190217391304348}\n",
"{'loss': 2.938851165771484, 'learning_rate': 2.391304347826087e-05, 'epoch': 0.5217391304347826}\n",
"{'loss': 3.106460762023926, 'learning_rate': 2.3777173913043478e-05, 'epoch': 0.5244565217391305}\n",
"{'loss': 2.889130401611328, 'learning_rate': 2.3641304347826088e-05, 'epoch': 0.5271739130434783}\n",
"{'loss': 2.952214241027832, 'learning_rate': 2.3505434782608695e-05, 'epoch': 0.529891304347826}\n",
"{'loss': 3.0268835067749023, 'learning_rate': 2.3369565217391306e-05, 'epoch': 0.532608695652174}\n",
"{'loss': 2.8543680191040037, 'learning_rate': 2.3233695652173916e-05, 'epoch': 0.5353260869565217}\n",
"{'loss': 2.91422176361084, 'learning_rate': 2.3097826086956523e-05, 'epoch': 0.5380434782608695}\n",
"{'loss': 2.9302282333374023, 'learning_rate': 2.296195652173913e-05, 'epoch': 0.5407608695652174}\n",
"{'loss': 2.9321842193603516, 'learning_rate': 2.282608695652174e-05, 'epoch': 0.5434782608695652}\n",
" 54% 4000/7360 [10:45<08:21, 6.70it/s][INFO|trainer.py:1226] 2020-12-31 11:40:47,338 >> Saving model checkpoint to output/nosleep/checkpoint-4000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:40:47,340 >> Configuration saved in output/nosleep/checkpoint-4000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:40:48,945 >> Model weights saved in output/nosleep/checkpoint-4000/pytorch_model.bin\n",
"{'loss': 2.8125045776367186, 'learning_rate': 2.269021739130435e-05, 'epoch': 0.5461956521739131}\n",
"{'loss': 2.896123695373535, 'learning_rate': 2.2554347826086958e-05, 'epoch': 0.5489130434782609}\n",
"{'loss': 2.864361381530762, 'learning_rate': 2.2418478260869565e-05, 'epoch': 0.5516304347826086}\n",
"{'loss': 2.89820556640625, 'learning_rate': 2.2282608695652175e-05, 'epoch': 0.5543478260869565}\n",
"{'loss': 2.921078109741211, 'learning_rate': 2.2146739130434786e-05, 'epoch': 0.5570652173913043}\n",
"{'loss': 2.879536247253418, 'learning_rate': 2.2010869565217393e-05, 'epoch': 0.5597826086956522}\n",
"{'loss': 3.100284194946289, 'learning_rate': 2.1875e-05, 'epoch': 0.5625}\n",
"{'loss': 2.955865669250488, 'learning_rate': 2.173913043478261e-05, 'epoch': 0.5652173913043478}\n",
"{'loss': 2.966480827331543, 'learning_rate': 2.1603260869565217e-05, 'epoch': 0.5679347826086957}\n",
"{'loss': 2.9319448471069336, 'learning_rate': 2.1467391304347828e-05, 'epoch': 0.5706521739130435}\n",
"{'loss': 2.9581552505493165, 'learning_rate': 2.1331521739130435e-05, 'epoch': 0.5733695652173914}\n",
"{'loss': 2.8463247299194334, 'learning_rate': 2.1195652173913045e-05, 'epoch': 0.5760869565217391}\n",
"{'loss': 3.006674575805664, 'learning_rate': 2.1059782608695652e-05, 'epoch': 0.5788043478260869}\n",
"{'loss': 2.8334024429321287, 'learning_rate': 2.0923913043478262e-05, 'epoch': 0.5815217391304348}\n",
"{'loss': 2.8948385238647463, 'learning_rate': 2.0788043478260873e-05, 'epoch': 0.5842391304347826}\n",
"{'loss': 2.8607654571533203, 'learning_rate': 2.065217391304348e-05, 'epoch': 0.5869565217391305}\n",
"{'loss': 2.9063913345336916, 'learning_rate': 2.0516304347826087e-05, 'epoch': 0.5896739130434783}\n",
"{'loss': 2.932078170776367, 'learning_rate': 2.0380434782608694e-05, 'epoch': 0.592391304347826}\n",
"{'loss': 2.856118392944336, 'learning_rate': 2.0244565217391308e-05, 'epoch': 0.595108695652174}\n",
"{'loss': 2.9656396865844727, 'learning_rate': 2.0108695652173915e-05, 'epoch': 0.5978260869565217}\n",
"{'loss': 2.890290451049805, 'learning_rate': 1.997282608695652e-05, 'epoch': 0.6005434782608695}\n",
"{'loss': 3.000225639343262, 'learning_rate': 1.9836956521739132e-05, 'epoch': 0.6032608695652174}\n",
"{'loss': 3.014234733581543, 'learning_rate': 1.970108695652174e-05, 'epoch': 0.6059782608695652}\n",
"{'loss': 2.9607624053955077, 'learning_rate': 1.956521739130435e-05, 'epoch': 0.6086956521739131}\n",
"{'loss': 2.9253002166748048, 'learning_rate': 1.9429347826086957e-05, 'epoch': 0.6114130434782609}\n",
" 61% 4500/7360 [12:06<07:13, 6.59it/s][INFO|trainer.py:1226] 2020-12-31 11:42:08,834 >> Saving model checkpoint to output/nosleep/checkpoint-4500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:42:08,835 >> Configuration saved in output/nosleep/checkpoint-4500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:42:10,483 >> Model weights saved in output/nosleep/checkpoint-4500/pytorch_model.bin\n",
"{'loss': 2.8905784606933596, 'learning_rate': 1.9293478260869567e-05, 'epoch': 0.6141304347826086}\n",
"{'loss': 2.9393293380737306, 'learning_rate': 1.9157608695652174e-05, 'epoch': 0.6168478260869565}\n",
"{'loss': 2.853158378601074, 'learning_rate': 1.9021739130434784e-05, 'epoch': 0.6195652173913043}\n",
"{'loss': 2.912156677246094, 'learning_rate': 1.888586956521739e-05, 'epoch': 0.6222826086956522}\n",
"{'loss': 2.991878318786621, 'learning_rate': 1.8750000000000002e-05, 'epoch': 0.625}\n",
"{'loss': 2.8975969314575196, 'learning_rate': 1.861413043478261e-05, 'epoch': 0.6277173913043478}\n",
"{'loss': 2.926823616027832, 'learning_rate': 1.8478260869565216e-05, 'epoch': 0.6304347826086957}\n",
"{'loss': 2.9280727386474608, 'learning_rate': 1.834239130434783e-05, 'epoch': 0.6331521739130435}\n",
"{'loss': 2.8288547515869142, 'learning_rate': 1.8206521739130437e-05, 'epoch': 0.6358695652173914}\n",
"{'loss': 2.947658348083496, 'learning_rate': 1.8070652173913044e-05, 'epoch': 0.6385869565217391}\n",
"{'loss': 2.917711639404297, 'learning_rate': 1.793478260869565e-05, 'epoch': 0.6413043478260869}\n",
"{'loss': 3.0101985931396484, 'learning_rate': 1.7798913043478264e-05, 'epoch': 0.6440217391304348}\n",
"{'loss': 2.986308288574219, 'learning_rate': 1.766304347826087e-05, 'epoch': 0.6467391304347826}\n",
"{'loss': 2.9199193954467773, 'learning_rate': 1.752717391304348e-05, 'epoch': 0.6494565217391305}\n",
"{'loss': 2.920822525024414, 'learning_rate': 1.739130434782609e-05, 'epoch': 0.6521739130434783}\n",
"{'loss': 2.9012218475341798, 'learning_rate': 1.7255434782608696e-05, 'epoch': 0.654891304347826}\n",
"{'loss': 2.9841341018676757, 'learning_rate': 1.7119565217391306e-05, 'epoch': 0.657608695652174}\n",
"{'loss': 2.8883106231689455, 'learning_rate': 1.6983695652173913e-05, 'epoch': 0.6603260869565217}\n",
"{'loss': 2.9954706192016602, 'learning_rate': 1.6847826086956524e-05, 'epoch': 0.6630434782608695}\n",
"{'loss': 3.049188995361328, 'learning_rate': 1.671195652173913e-05, 'epoch': 0.6657608695652174}\n",
"{'loss': 2.8762126922607423, 'learning_rate': 1.6576086956521738e-05, 'epoch': 0.6684782608695652}\n",
"{'loss': 2.901471710205078, 'learning_rate': 1.6440217391304348e-05, 'epoch': 0.6711956521739131}\n",
"{'loss': 2.7890932083129885, 'learning_rate': 1.630434782608696e-05, 'epoch': 0.6739130434782609}\n",
"{'loss': 2.9092123031616213, 'learning_rate': 1.6168478260869565e-05, 'epoch': 0.6766304347826086}\n",
"{'loss': 2.905811309814453, 'learning_rate': 1.6032608695652173e-05, 'epoch': 0.6793478260869565}\n",
" 68% 5000/7360 [13:27<05:53, 6.68it/s][INFO|trainer.py:1226] 2020-12-31 11:43:30,013 >> Saving model checkpoint to output/nosleep/checkpoint-5000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:43:30,014 >> Configuration saved in output/nosleep/checkpoint-5000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:43:31,742 >> Model weights saved in output/nosleep/checkpoint-5000/pytorch_model.bin\n",
"{'loss': 3.02506046295166, 'learning_rate': 1.5896739130434783e-05, 'epoch': 0.6820652173913043}\n",
"{'loss': 2.952139663696289, 'learning_rate': 1.5760869565217393e-05, 'epoch': 0.6847826086956522}\n",
"{'loss': 2.9070606231689453, 'learning_rate': 1.5625e-05, 'epoch': 0.6875}\n",
"{'loss': 2.934033966064453, 'learning_rate': 1.5489130434782607e-05, 'epoch': 0.6902173913043478}\n",
"{'loss': 2.973927688598633, 'learning_rate': 1.5353260869565218e-05, 'epoch': 0.6929347826086957}\n",
"{'loss': 2.9208562850952147, 'learning_rate': 1.5217391304347828e-05, 'epoch': 0.6956521739130435}\n",
"{'loss': 2.8716516494750977, 'learning_rate': 1.5081521739130435e-05, 'epoch': 0.6983695652173914}\n",
"{'loss': 3.008540916442871, 'learning_rate': 1.4945652173913044e-05, 'epoch': 0.7010869565217391}\n",
"{'loss': 2.9472280502319337, 'learning_rate': 1.4809782608695653e-05, 'epoch': 0.7038043478260869}\n",
"{'loss': 2.980273628234863, 'learning_rate': 1.4673913043478263e-05, 'epoch': 0.7065217391304348}\n",
"{'loss': 2.887953758239746, 'learning_rate': 1.4538043478260872e-05, 'epoch': 0.7092391304347826}\n",
"{'loss': 2.964910125732422, 'learning_rate': 1.4402173913043479e-05, 'epoch': 0.7119565217391305}\n",
"{'loss': 2.907693290710449, 'learning_rate': 1.4266304347826087e-05, 'epoch': 0.7146739130434783}\n",
"{'loss': 2.995789337158203, 'learning_rate': 1.4130434782608694e-05, 'epoch': 0.717391304347826}\n",
"{'loss': 2.919285202026367, 'learning_rate': 1.3994565217391307e-05, 'epoch': 0.720108695652174}\n",
"{'loss': 2.891135025024414, 'learning_rate': 1.3858695652173914e-05, 'epoch': 0.7228260869565217}\n",
"{'loss': 2.890410041809082, 'learning_rate': 1.3722826086956522e-05, 'epoch': 0.7255434782608695}\n",
"{'loss': 2.884817695617676, 'learning_rate': 1.3586956521739131e-05, 'epoch': 0.7282608695652174}\n",
"{'loss': 2.9657230377197266, 'learning_rate': 1.3451086956521738e-05, 'epoch': 0.7309782608695652}\n",
"{'loss': 2.885117530822754, 'learning_rate': 1.331521739130435e-05, 'epoch': 0.7336956521739131}\n",
"{'loss': 2.961221694946289, 'learning_rate': 1.3179347826086957e-05, 'epoch': 0.7364130434782609}\n",
"{'loss': 2.890369415283203, 'learning_rate': 1.3043478260869566e-05, 'epoch': 0.7391304347826086}\n",
"{'loss': 2.9881954193115234, 'learning_rate': 1.2907608695652173e-05, 'epoch': 0.7418478260869565}\n",
"{'loss': 2.870479774475098, 'learning_rate': 1.2771739130434785e-05, 'epoch': 0.7445652173913043}\n",
"{'loss': 2.900164794921875, 'learning_rate': 1.2635869565217392e-05, 'epoch': 0.7472826086956522}\n",
" 75% 5500/7360 [14:48<04:37, 6.71it/s][INFO|trainer.py:1226] 2020-12-31 11:44:51,058 >> Saving model checkpoint to output/nosleep/checkpoint-5500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:44:51,059 >> Configuration saved in output/nosleep/checkpoint-5500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:44:52,709 >> Model weights saved in output/nosleep/checkpoint-5500/pytorch_model.bin\n",
"{'loss': 2.874920654296875, 'learning_rate': 1.25e-05, 'epoch': 0.75}\n",
"{'loss': 2.9421880722045897, 'learning_rate': 1.236413043478261e-05, 'epoch': 0.7527173913043478}\n",
"{'loss': 2.9182043075561523, 'learning_rate': 1.2228260869565218e-05, 'epoch': 0.7554347826086957}\n",
"{'loss': 2.853078842163086, 'learning_rate': 1.2092391304347827e-05, 'epoch': 0.7581521739130435}\n",
"{'loss': 2.754827880859375, 'learning_rate': 1.1956521739130435e-05, 'epoch': 0.7608695652173914}\n",
"{'loss': 3.0058160781860352, 'learning_rate': 1.1820652173913044e-05, 'epoch': 0.7635869565217391}\n",
"{'loss': 2.938898468017578, 'learning_rate': 1.1684782608695653e-05, 'epoch': 0.7663043478260869}\n",
"{'loss': 2.9451087951660155, 'learning_rate': 1.1548913043478262e-05, 'epoch': 0.7690217391304348}\n",
"{'loss': 2.832347869873047, 'learning_rate': 1.141304347826087e-05, 'epoch': 0.7717391304347826}\n",
"{'loss': 2.9098974227905274, 'learning_rate': 1.1277173913043479e-05, 'epoch': 0.7744565217391305}\n",
"{'loss': 2.992401695251465, 'learning_rate': 1.1141304347826088e-05, 'epoch': 0.7771739130434783}\n",
"{'loss': 2.7906036376953125, 'learning_rate': 1.1005434782608696e-05, 'epoch': 0.779891304347826}\n",
"{'loss': 2.887853240966797, 'learning_rate': 1.0869565217391305e-05, 'epoch': 0.782608695652174}\n",
"{'loss': 2.8920427322387696, 'learning_rate': 1.0733695652173914e-05, 'epoch': 0.7853260869565217}\n",
"{'loss': 2.8993629455566405, 'learning_rate': 1.0597826086956523e-05, 'epoch': 0.7880434782608695}\n",
"{'loss': 2.9929531097412108, 'learning_rate': 1.0461956521739131e-05, 'epoch': 0.7907608695652174}\n",
"{'loss': 2.79913272857666, 'learning_rate': 1.032608695652174e-05, 'epoch': 0.7934782608695652}\n",
"{'loss': 2.822602462768555, 'learning_rate': 1.0190217391304347e-05, 'epoch': 0.7961956521739131}\n",
"{'loss': 2.8938386917114256, 'learning_rate': 1.0054347826086957e-05, 'epoch': 0.7989130434782609}\n",
"{'loss': 2.9126588821411135, 'learning_rate': 9.918478260869566e-06, 'epoch': 0.8016304347826086}\n",
"{'loss': 2.9466136932373046, 'learning_rate': 9.782608695652175e-06, 'epoch': 0.8043478260869565}\n",
"{'loss': 2.766586685180664, 'learning_rate': 9.646739130434783e-06, 'epoch': 0.8070652173913043}\n",
"{'loss': 2.846967315673828, 'learning_rate': 9.510869565217392e-06, 'epoch': 0.8097826086956522}\n",
"{'loss': 2.7293128967285156, 'learning_rate': 9.375000000000001e-06, 'epoch': 0.8125}\n",
"{'loss': 2.959208297729492, 'learning_rate': 9.239130434782608e-06, 'epoch': 0.8152173913043478}\n",
" 82% 6000/7360 [16:09<03:21, 6.74it/s][INFO|trainer.py:1226] 2020-12-31 11:46:12,110 >> Saving model checkpoint to output/nosleep/checkpoint-6000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:46:12,111 >> Configuration saved in output/nosleep/checkpoint-6000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:46:13,862 >> Model weights saved in output/nosleep/checkpoint-6000/pytorch_model.bin\n",
"{'loss': 2.9754858016967773, 'learning_rate': 9.103260869565218e-06, 'epoch': 0.8179347826086957}\n",
"{'loss': 2.885609817504883, 'learning_rate': 8.967391304347825e-06, 'epoch': 0.8206521739130435}\n",
"{'loss': 2.9586454391479493, 'learning_rate': 8.831521739130436e-06, 'epoch': 0.8233695652173914}\n",
"{'loss': 2.8784345626831054, 'learning_rate': 8.695652173913044e-06, 'epoch': 0.8260869565217391}\n",
"{'loss': 2.879836654663086, 'learning_rate': 8.559782608695653e-06, 'epoch': 0.8288043478260869}\n",
"{'loss': 2.8585273742675783, 'learning_rate': 8.423913043478262e-06, 'epoch': 0.8315217391304348}\n",
"{'loss': 2.9230180740356446, 'learning_rate': 8.288043478260869e-06, 'epoch': 0.8342391304347826}\n",
"{'loss': 2.80902042388916, 'learning_rate': 8.15217391304348e-06, 'epoch': 0.8369565217391305}\n",
"{'loss': 2.821381378173828, 'learning_rate': 8.016304347826086e-06, 'epoch': 0.8396739130434783}\n",
"{'loss': 2.922434616088867, 'learning_rate': 7.880434782608697e-06, 'epoch': 0.842391304347826}\n",
"{'loss': 2.908469390869141, 'learning_rate': 7.744565217391304e-06, 'epoch': 0.845108695652174}\n",
"{'loss': 3.046040153503418, 'learning_rate': 7.608695652173914e-06, 'epoch': 0.8478260869565217}\n",
"{'loss': 2.9647945404052733, 'learning_rate': 7.472826086956522e-06, 'epoch': 0.8505434782608695}\n",
"{'loss': 2.9177480697631837, 'learning_rate': 7.3369565217391315e-06, 'epoch': 0.8532608695652174}\n",
"{'loss': 2.9124732971191407, 'learning_rate': 7.201086956521739e-06, 'epoch': 0.8559782608695652}\n",
"{'loss': 2.8802576065063477, 'learning_rate': 7.065217391304347e-06, 'epoch': 0.8586956521739131}\n",
"{'loss': 2.820792579650879, 'learning_rate': 6.929347826086957e-06, 'epoch': 0.8614130434782609}\n",
"{'loss': 2.8953441619873046, 'learning_rate': 6.7934782608695655e-06, 'epoch': 0.8641304347826086}\n",
"{'loss': 2.8939170837402344, 'learning_rate': 6.657608695652175e-06, 'epoch': 0.8668478260869565}\n",
"{'loss': 2.8508325576782227, 'learning_rate': 6.521739130434783e-06, 'epoch': 0.8695652173913043}\n",
"{'loss': 2.891870307922363, 'learning_rate': 6.3858695652173924e-06, 'epoch': 0.8722826086956522}\n",
"{'loss': 2.8636974334716796, 'learning_rate': 6.25e-06, 'epoch': 0.875}\n",
"{'loss': 2.89998779296875, 'learning_rate': 6.114130434782609e-06, 'epoch': 0.8777173913043478}\n",
"{'loss': 2.8457530975341796, 'learning_rate': 5.978260869565218e-06, 'epoch': 0.8804347826086957}\n",
"{'loss': 2.912706756591797, 'learning_rate': 5.842391304347826e-06, 'epoch': 0.8831521739130435}\n",
" 88% 6500/7360 [17:30<02:09, 6.66it/s][INFO|trainer.py:1226] 2020-12-31 11:47:33,211 >> Saving model checkpoint to output/nosleep/checkpoint-6500\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:47:33,213 >> Configuration saved in output/nosleep/checkpoint-6500/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:47:34,988 >> Model weights saved in output/nosleep/checkpoint-6500/pytorch_model.bin\n",
"{'loss': 2.898715782165527, 'learning_rate': 5.706521739130435e-06, 'epoch': 0.8858695652173914}\n",
"{'loss': 2.9165903091430665, 'learning_rate': 5.570652173913044e-06, 'epoch': 0.8885869565217391}\n",
"{'loss': 2.7092910766601563, 'learning_rate': 5.4347826086956525e-06, 'epoch': 0.8913043478260869}\n",
"{'loss': 2.830535888671875, 'learning_rate': 5.298913043478261e-06, 'epoch': 0.8940217391304348}\n",
"{'loss': 2.9604679107666017, 'learning_rate': 5.16304347826087e-06, 'epoch': 0.8967391304347826}\n",
"{'loss': 2.933907699584961, 'learning_rate': 5.027173913043479e-06, 'epoch': 0.8994565217391305}\n",
"{'loss': 2.8418628692626955, 'learning_rate': 4.891304347826087e-06, 'epoch': 0.9021739130434783}\n",
"{'loss': 2.844557189941406, 'learning_rate': 4.755434782608696e-06, 'epoch': 0.904891304347826}\n",
"{'loss': 2.9896066665649412, 'learning_rate': 4.619565217391304e-06, 'epoch': 0.907608695652174}\n",
"{'loss': 2.973470687866211, 'learning_rate': 4.483695652173913e-06, 'epoch': 0.9103260869565217}\n",
"{'loss': 2.9425249099731445, 'learning_rate': 4.347826086956522e-06, 'epoch': 0.9130434782608695}\n",
"{'loss': 2.849333381652832, 'learning_rate': 4.211956521739131e-06, 'epoch': 0.9157608695652174}\n",
"{'loss': 2.9857072830200195, 'learning_rate': 4.07608695652174e-06, 'epoch': 0.9184782608695652}\n",
"{'loss': 2.958251953125, 'learning_rate': 3.940217391304348e-06, 'epoch': 0.9211956521739131}\n",
"{'loss': 2.8145523071289062, 'learning_rate': 3.804347826086957e-06, 'epoch': 0.9239130434782609}\n",
"{'loss': 2.7729000091552733, 'learning_rate': 3.6684782608695657e-06, 'epoch': 0.9266304347826086}\n",
"{'loss': 2.9284555435180666, 'learning_rate': 3.5326086956521736e-06, 'epoch': 0.9293478260869565}\n",
"{'loss': 2.8683935165405274, 'learning_rate': 3.3967391304347827e-06, 'epoch': 0.9320652173913043}\n",
"{'loss': 2.913663864135742, 'learning_rate': 3.2608695652173914e-06, 'epoch': 0.9347826086956522}\n",
"{'loss': 2.940004348754883, 'learning_rate': 3.125e-06, 'epoch': 0.9375}\n",
"{'loss': 2.8404977798461912, 'learning_rate': 2.989130434782609e-06, 'epoch': 0.9402173913043478}\n",
"{'loss': 2.9072927474975585, 'learning_rate': 2.8532608695652176e-06, 'epoch': 0.9429347826086957}\n",
"{'loss': 2.956881523132324, 'learning_rate': 2.7173913043478263e-06, 'epoch': 0.9456521739130435}\n",
"{'loss': 2.9157276153564453, 'learning_rate': 2.581521739130435e-06, 'epoch': 0.9483695652173914}\n",
"{'loss': 2.7884153366088866, 'learning_rate': 2.4456521739130437e-06, 'epoch': 0.9510869565217391}\n",
" 95% 7000/7360 [18:51<00:54, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:48:53,869 >> Saving model checkpoint to output/nosleep/checkpoint-7000\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:48:53,871 >> Configuration saved in output/nosleep/checkpoint-7000/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:48:55,423 >> Model weights saved in output/nosleep/checkpoint-7000/pytorch_model.bin\n",
"{'loss': 2.8185762405395507, 'learning_rate': 2.309782608695652e-06, 'epoch': 0.9538043478260869}\n",
"{'loss': 2.95583553314209, 'learning_rate': 2.173913043478261e-06, 'epoch': 0.9565217391304348}\n",
"{'loss': 2.8609878540039064, 'learning_rate': 2.03804347826087e-06, 'epoch': 0.9592391304347826}\n",
"{'loss': 2.9434335708618162, 'learning_rate': 1.9021739130434785e-06, 'epoch': 0.9619565217391305}\n",
"{'loss': 2.781317138671875, 'learning_rate': 1.7663043478260868e-06, 'epoch': 0.9646739130434783}\n",
"{'loss': 2.8071691513061525, 'learning_rate': 1.6304347826086957e-06, 'epoch': 0.967391304347826}\n",
"{'loss': 2.9390436172485352, 'learning_rate': 1.4945652173913044e-06, 'epoch': 0.970108695652174}\n",
"{'loss': 2.787994384765625, 'learning_rate': 1.3586956521739131e-06, 'epoch': 0.9728260869565217}\n",
"{'loss': 2.919297790527344, 'learning_rate': 1.2228260869565218e-06, 'epoch': 0.9755434782608695}\n",
"{'loss': 2.9832035064697267, 'learning_rate': 1.0869565217391306e-06, 'epoch': 0.9782608695652174}\n",
"{'loss': 2.8524606704711912, 'learning_rate': 9.510869565217393e-07, 'epoch': 0.9809782608695652}\n",
"{'loss': 2.9475589752197267, 'learning_rate': 8.152173913043479e-07, 'epoch': 0.9836956521739131}\n",
"{'loss': 2.798427772521973, 'learning_rate': 6.793478260869566e-07, 'epoch': 0.9864130434782609}\n",
"{'loss': 2.921639823913574, 'learning_rate': 5.434782608695653e-07, 'epoch': 0.9891304347826086}\n",
"{'loss': 2.993421936035156, 'learning_rate': 4.0760869565217393e-07, 'epoch': 0.9918478260869565}\n",
"{'loss': 2.7664459228515623, 'learning_rate': 2.7173913043478264e-07, 'epoch': 0.9945652173913043}\n",
"{'loss': 2.8903715133666994, 'learning_rate': 1.3586956521739132e-07, 'epoch': 0.9972826086956522}\n",
"{'loss': 2.9647308349609376, 'learning_rate': 0.0, 'epoch': 1.0}\n",
"100% 7360/7360 [19:51<00:00, 6.65it/s][INFO|trainer.py:862] 2020-12-31 11:49:53,665 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'epoch': 1.0}\n",
"100% 7360/7360 [19:51<00:00, 6.18it/s]\n",
"[INFO|trainer.py:1226] 2020-12-31 11:49:53,713 >> Saving model checkpoint to output/nosleep\n",
"[INFO|configuration_utils.py:289] 2020-12-31 11:49:53,715 >> Configuration saved in output/nosleep/config.json\n",
"[INFO|modeling_utils.py:814] 2020-12-31 11:49:55,433 >> Model weights saved in output/nosleep/pytorch_model.bin\n",
"12/31/2020 11:49:55 - INFO - __main__ - ***** Train results *****\n",
"Traceback (most recent call last):\n",
" File \"run_clm.py\", line 385, in <module>\n",
" main()\n",
" File \"run_clm.py\", line 351, in main\n",
" for key, value in sorted(train_result.metrics.items()):\n",
"AttributeError: 'TrainOutput' object has no attribute 'metrics'\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "r2CLOnxXsP8U"
},
"source": [
"### Giving some input"
]
},
{
"cell_type": "code",
"metadata": {
"id": "rsb1pSFYgXm7"
},
"source": [
"SENTENCES = [\"It was midnight when I\"]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZBfQVBDag1ut",
"outputId": "6a2069b2-d881-485f-e39e-114b036a696e"
},
"source": [
"import random\r\n",
"seed = random.randint(0, 2**32-1)\r\n",
"seed"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"2226218420"
]
},
"metadata": {
"tags": []
},
"execution_count": 52
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-IndXjN1sVxL"
},
"source": [
"### Generating samples"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ufqVA9IXg27_",
"outputId": "8c0f14e4-f775-4e55-9e9f-4d94299e3bfa"
},
"source": [
"examples = []\r\n",
"num_return_sequences = 5\r\n",
"\r\n",
"for start in SENTENCES:\r\n",
" val = !python run_generation.py \\\r\n",
" --model_type gpt2 \\\r\n",
" --model_name_or_path output/$sub \\\r\n",
" --length 160 \\\r\n",
" --num_return_sequences $num_return_sequences \\\r\n",
" --temperature 0.5 \\\r\n",
" --p 0.95 \\\r\n",
" --seed $seed \\\r\n",
" --prompt {'\"<|endoftext|>' + start + '\"'}\r\n",
" generated = [val[-1-2*k] for k in range(num_return_sequences)[::-1]]\r\n",
" print(f'\\nStart of sentence: {start}')\r\n",
" for i, g in enumerate(generated):\r\n",
" g = g.replace('<|endoftext|>', '')\r\n",
" print(f'* Generated #{i+1}: {g}')"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"\n",
"Start of sentence: It was midnight when I\n",
"* Generated #1: It was midnight when I arrived home from work. My wife and I had just gotten a new car. We had been spending most of our time at the local park and the time we spent in the park was spent in the park. I’d spent a lot of time there, getting to know the people, the parks, the people who lived in the park. I had been to the park before, but I had never seen anything like it. I was about to leave when I heard a knock on the door. “Come in!” I called out. I was about to jump out when I heard a knock on the door. “Hello?” I asked. “Oh, hello?” “You’re not in\n",
"* Generated #2: It was midnight when I woke up. I was in a hospital bed, but the doctors had not seen me for several days. I had been in a coma for about two weeks. I had been in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I\n",
"* Generated #3: It was midnight when I woke up. I had to go to bed, but I didn’t want to go to sleep. I looked up at the ceiling, hoping I’d wake up. I was still awake, but I felt a little weak. I wanted to go to sleep, but I didn’t know what to do. I tried to scream, but I couldn’t. I tried to wake up, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I\n",
"* Generated #4: It was midnight when I got home from work. I was going to go check on my wife, but she was gone. I was going to go check on my daughter. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter,\n",
"* Generated #5: It was midnight when I woke up. My bedroom was dark, but I was awake. I could hear the faint thumping of a doorbell. I turned to look, but it was gone. I looked again, and there was nothing. The door was open, but I couldn’t see anything. I walked to the door, but it was locked. I got up, and walked to the bed. It was empty, but I could see a figure sitting on the bed. I looked up, and saw that it was the same one I’d seen in the house. I opened the door, and it was gone. I looked around again, and I saw that it was gone too. I walked to the door again, and saw that it was locked. I walked to the door again,\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment