-
-
Save iam-abbas/307ed1ac92cb288859345366fdf15e74 to your computer and use it in GitHub Desktop.
GPT2 Horror.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "GPT2 Horror.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyMIl7puQAbKypz3VA2zgjT8", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"1899861f4ed64cb89a9481f5a8fa4a2e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_e14589f6b0fb48869f2a7f96154173b7", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_48bda53ec37045f085520e23242d6d00", | |
"IPY_MODEL_cae29f75c54640d9b474ece68f56c392" | |
] | |
} | |
}, | |
"e14589f6b0fb48869f2a7f96154173b7": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"48bda53ec37045f085520e23242d6d00": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_0b4b0024a90645b0853c264fe0946867", | |
"_dom_classes": [], | |
"description": "100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 716, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 716, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_e8553d1e66204000aef6076ba534af7d" | |
} | |
}, | |
"cae29f75c54640d9b474ece68f56c392": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_8052912abf614c718fbaab3a275b33b2", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 716/716 [01:20<00:00, 8.89it/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_a63f5904e92143e0ab44a96b7f052622" | |
} | |
}, | |
"0b4b0024a90645b0853c264fe0946867": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"e8553d1e66204000aef6076ba534af7d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"8052912abf614c718fbaab3a275b33b2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"a63f5904e92143e0ab44a96b7f052622": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/iam-abbas/307ed1ac92cb288859345366fdf15e74/gpt2-horror.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "H_5EeUiNrP5S" | |
}, | |
"source": [ | |
"## Generating Horror Stories using GPT-2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BqFIT1YCrViW" | |
}, | |
"source": [ | |
"### Install required libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ey3VCJmNdjM8", | |
"outputId": "a25fbc62-be25-4fd7-c414-80442d879844" | |
}, | |
"source": [ | |
"# install required libraries are not installed\r\n", | |
"!pip install torch -qq\r\n", | |
"!pip install transformers -qq\r\n", | |
"!pip install -q praw\r\n", | |
"!pip install tweepy -qq\r\n", | |
"!pip install datasets" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\u001b[K |████████████████████████████████| 1.5MB 4.0MB/s \n", | |
"\u001b[K |████████████████████████████████| 890kB 53.3MB/s \n", | |
"\u001b[K |████████████████████████████████| 2.9MB 47.8MB/s \n", | |
"\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"\u001b[K |████████████████████████████████| 153kB 5.5MB/s \n", | |
"\u001b[K |████████████████████████████████| 204kB 32.7MB/s \n", | |
"\u001b[?25h" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "jpC6i6ITrZLj" | |
}, | |
"source": [ | |
"### Get the files needed for fine-tuning" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QGFKGMI7dz1z" | |
}, | |
"source": [ | |
"# HuggingFace scripts for fine-tuning models and language generation\r\n", | |
"!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/text-generation/run_generation.py -q" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zNSwZr1aei7t" | |
}, | |
"source": [ | |
"!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/language-modeling/run_clm.py -q" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CQ6BDeqzreJw" | |
}, | |
"source": [ | |
"### Importing all the required libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UBN6CCY3eFbT" | |
}, | |
"source": [ | |
"import praw\r\n", | |
"import json\r\n", | |
"import re\r\n", | |
"from tqdm import tqdm_notebook as tqdm\r\n", | |
"import random\r\n", | |
"import torch" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "cHia3CgHriH_" | |
}, | |
"source": [ | |
"### Loading the Reddit Developer API credentials that are stored in a JSON file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Yjk8EiFOd1aw" | |
}, | |
"source": [ | |
"with open('credentials.json') as file:\r\n", | |
" creds = json.loads(file.read())" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "40hhlJoQd5Sb" | |
}, | |
"source": [ | |
"scraper = praw.Reddit(client_id=creds['client_id'], client_secret=creds['client_secret'], user_agent=creds['user_agent'])" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "DqNxWLHBfIDo" | |
}, | |
"source": [ | |
"sub = \"nosleep\"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "SsuHGp_7rogA" | |
}, | |
"source": [ | |
"### Fetching stories from `r/nosleep` into a list" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9SYRaI-4fLXe" | |
}, | |
"source": [ | |
"stories = list(scraper.subreddit(sub).hot(limit=10000))" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "IK9OR4zIrwSP" | |
}, | |
"source": [ | |
"### Doing some cleaning on text using regex" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 117, | |
"referenced_widgets": [ | |
"1899861f4ed64cb89a9481f5a8fa4a2e", | |
"e14589f6b0fb48869f2a7f96154173b7", | |
"48bda53ec37045f085520e23242d6d00", | |
"cae29f75c54640d9b474ece68f56c392", | |
"0b4b0024a90645b0853c264fe0946867", | |
"e8553d1e66204000aef6076ba534af7d", | |
"8052912abf614c718fbaab3a275b33b2", | |
"a63f5904e92143e0ab44a96b7f052622" | |
] | |
}, | |
"id": "r_5eRvNGfRAb", | |
"outputId": "a559df1d-bf7f-4c04-aacd-b658803508cd" | |
}, | |
"source": [ | |
"clean_stories = []\r\n", | |
"for story in tqdm(stories):\r\n", | |
" story = re.sub(\"\\[[^)]*\\)\", \"\", story.selftext)\r\n", | |
" if story:\r\n", | |
" clean_stories.append(story.replace(\"\\n\", \" \"))\r\n" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", | |
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", | |
" \n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "1899861f4ed64cb89a9481f5a8fa4a2e", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=716.0), HTML(value='')))" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "fYtRLBB1r0xI" | |
}, | |
"source": [ | |
"## preparing the dataset for Training and Validation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EYMsDluFfUQ1" | |
}, | |
"source": [ | |
"# shuffle data\r\n", | |
"random.shuffle(clean_stories)\r\n", | |
"\r\n", | |
"# fraction of training data\r\n", | |
"split_train_valid = 0.9\r\n", | |
"\r\n", | |
"# split dataset\r\n", | |
"train_size = int(split_train_valid * len(clean_stories))\r\n", | |
"valid_size = len(clean_stories) - train_size\r\n", | |
"train_dataset, valid_dataset = torch.utils.data.random_split(clean_stories, [train_size, valid_size])" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "G-3wQ5jDfi6l" | |
}, | |
"source": [ | |
"def make_dataset(dataset, epochs):\r\n", | |
" total_text = '<|endoftext|>'\r\n", | |
" tweets = [t for t in dataset]\r\n", | |
" for _ in range(epochs):\r\n", | |
" random.shuffle(tweets)\r\n", | |
" total_text += '<|endoftext|>'.join(tweets) + '<|endoftext|>'\r\n", | |
" return total_text" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "NWzbVpxKr8OK" | |
}, | |
"source": [ | |
"### Saving the training and validation data into respective files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "RUZQW3Oufkye" | |
}, | |
"source": [ | |
"EPOCHS = 4\r\n", | |
"\r\n", | |
"with open('{}_train.txt'.format(sub), 'w') as f:\r\n", | |
" data = make_dataset(train_dataset, EPOCHS)\r\n", | |
" f.write(data)\r\n", | |
"\r\n", | |
"with open('{}_valid.txt'.format(sub), 'w') as f:\r\n", | |
" data = make_dataset(valid_dataset, 1)\r\n", | |
" f.write(data)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Qe9GJYP2sA-W" | |
}, | |
"source": [ | |
"## Using Transformer's Casual Language Model fin-tuning scruipt to re-train the model on custom dataset and saving the weights and checkpoints at output/nosleep" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "S8dpmqodihMt", | |
"outputId": "aa464b0e-bee6-41c4-b56a-bb71cf03439b" | |
}, | |
"source": [ | |
"!python run_clm.py \\\r\n", | |
" --output_dir=output/$sub \\\r\n", | |
" --overwrite_output_dir \\\r\n", | |
" --overwrite_cache \\\r\n", | |
" --model_type=gpt2 \\\r\n", | |
" --model_name_or_path=gpt2 \\\r\n", | |
" --do_train \\\r\n", | |
" --do_eval \\\r\n", | |
" --train_file=$sub\\_train.txt \\\r\n", | |
" --validation_file=$sub\\_valid.txt \\\r\n", | |
" --eval_steps 20 \\\r\n", | |
" --logging_steps 20 \\\r\n", | |
" --per_gpu_train_batch_size 1 \\\r\n", | |
" --num_train_epochs 1" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"2020-12-31 11:28:23.993065: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n", | |
"12/31/2020 11:28:26 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", | |
"12/31/2020 11:28:26 - INFO - __main__ - Training/evaluation parameters TrainingArguments(output_dir='output/nosleep', overwrite_output_dir=True, do_train=True, do_eval=True, do_predict=False, model_parallel=False, evaluation_strategy=<EvaluationStrategy.NO: 'no'>, prediction_loss_only=False, per_device_train_batch_size=8, per_device_eval_batch_size=8, per_gpu_train_batch_size=1, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=5e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, warmup_steps=0, logging_dir='runs/Dec31_11-28-26_9398038d5345', logging_first_step=False, logging_steps=20, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, debug=False, dataloader_drop_last=False, eval_steps=20, dataloader_num_workers=0, past_index=-1, run_name='output/nosleep', disable_tqdm=False, remove_unused_columns=True, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, fp16_backend='auto', sharded_ddp=False)\n", | |
"Downloading: 2.57kB [00:00, 2.52MB/s] \n", | |
"Using custom data configuration default\n", | |
"Downloading and preparing dataset text/default-1e45ef12cc6822eb (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/text/default-1e45ef12cc6822eb/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab...\n", | |
"Dataset text downloaded and prepared to /root/.cache/huggingface/datasets/text/default-1e45ef12cc6822eb/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab. Subsequent calls will reuse this data.\n", | |
"[INFO|configuration_utils.py:431] 2020-12-31 11:28:29,191 >> loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n", | |
"[INFO|configuration_utils.py:467] 2020-12-31 11:28:29,192 >> Model config GPT2Config {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPT2LMHeadModel\"\n", | |
" ],\n", | |
" \"attn_pdrop\": 0.1,\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embd_pdrop\": 0.1,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"model_type\": \"gpt2\",\n", | |
" \"n_ctx\": 1024,\n", | |
" \"n_embd\": 768,\n", | |
" \"n_head\": 12,\n", | |
" \"n_inner\": null,\n", | |
" \"n_layer\": 12,\n", | |
" \"n_positions\": 1024,\n", | |
" \"resid_pdrop\": 0.1,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50\n", | |
" }\n", | |
" },\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257\n", | |
"}\n", | |
"\n", | |
"[INFO|configuration_utils.py:431] 2020-12-31 11:28:29,993 >> loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n", | |
"[INFO|configuration_utils.py:467] 2020-12-31 11:28:29,994 >> Model config GPT2Config {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPT2LMHeadModel\"\n", | |
" ],\n", | |
" \"attn_pdrop\": 0.1,\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embd_pdrop\": 0.1,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"model_type\": \"gpt2\",\n", | |
" \"n_ctx\": 1024,\n", | |
" \"n_embd\": 768,\n", | |
" \"n_head\": 12,\n", | |
" \"n_inner\": null,\n", | |
" \"n_layer\": 12,\n", | |
" \"n_positions\": 1024,\n", | |
" \"resid_pdrop\": 0.1,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50\n", | |
" }\n", | |
" },\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257\n", | |
"}\n", | |
"\n", | |
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n", | |
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n", | |
"[INFO|tokenization_utils_base.py:1802] 2020-12-31 11:28:31,662 >> loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n", | |
"[INFO|modeling_utils.py:1024] 2020-12-31 11:28:32,288 >> loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n", | |
"[INFO|modeling_utils.py:1140] 2020-12-31 11:28:37,614 >> All model checkpoint weights were used when initializing GPT2LMHeadModel.\n", | |
"\n", | |
"[INFO|modeling_utils.py:1149] 2020-12-31 11:28:37,614 >> All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n", | |
"If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n", | |
"[WARNING|tokenization_utils_base.py:3233] 2020-12-31 11:28:38,161 >> Token indices sequence length is longer than the specified maximum sequence length for this model (175756 > 1024). Running this sequence through the model will result in indexing errors\n", | |
"100% 3/3 [00:19<00:00, 6.38s/ba]\n", | |
"100% 1/1 [00:00<00:00, 1.57ba/s]\n", | |
"100% 3/3 [00:53<00:00, 17.75s/ba]\n", | |
"100% 1/1 [00:00<00:00, 5.50ba/s]\n", | |
"[INFO|trainer.py:388] 2020-12-31 11:30:02,192 >> The following columns in the training set don't have a corresponding argument in `GPT2LMHeadModel.forward` and have been ignored: .\n", | |
"[INFO|trainer.py:388] 2020-12-31 11:30:02,192 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2LMHeadModel.forward` and have been ignored: .\n", | |
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,193 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n", | |
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,196 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n", | |
"[INFO|trainer.py:703] 2020-12-31 11:30:02,196 >> ***** Running training *****\n", | |
"[INFO|trainer.py:704] 2020-12-31 11:30:02,196 >> Num examples = 7360\n", | |
"[INFO|trainer.py:705] 2020-12-31 11:30:02,196 >> Num Epochs = 1\n", | |
"[INFO|trainer.py:706] 2020-12-31 11:30:02,196 >> Instantaneous batch size per device = 8\n", | |
"[INFO|trainer.py:707] 2020-12-31 11:30:02,196 >> Total train batch size (w. parallel, distributed & accumulation) = 1\n", | |
"[INFO|trainer.py:708] 2020-12-31 11:30:02,196 >> Gradient Accumulation steps = 1\n", | |
"[INFO|trainer.py:709] 2020-12-31 11:30:02,196 >> Total optimization steps = 7360\n", | |
"[WARNING|training_args.py:423] 2020-12-31 11:30:02,202 >> Using deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.\n", | |
"{'loss': 3.512045669555664, 'learning_rate': 4.986413043478261e-05, 'epoch': 0.002717391304347826}\n", | |
"{'loss': 3.3283241271972654, 'learning_rate': 4.9728260869565216e-05, 'epoch': 0.005434782608695652}\n", | |
"{'loss': 3.364488983154297, 'learning_rate': 4.959239130434783e-05, 'epoch': 0.008152173913043478}\n", | |
"{'loss': 3.3092582702636717, 'learning_rate': 4.945652173913044e-05, 'epoch': 0.010869565217391304}\n", | |
"{'loss': 3.2144798278808593, 'learning_rate': 4.932065217391305e-05, 'epoch': 0.01358695652173913}\n", | |
"{'loss': 3.2673980712890627, 'learning_rate': 4.918478260869566e-05, 'epoch': 0.016304347826086956}\n", | |
"{'loss': 3.3383720397949217, 'learning_rate': 4.904891304347826e-05, 'epoch': 0.019021739130434784}\n", | |
"{'loss': 3.1150543212890627, 'learning_rate': 4.891304347826087e-05, 'epoch': 0.021739130434782608}\n", | |
"{'loss': 3.223776626586914, 'learning_rate': 4.8777173913043476e-05, 'epoch': 0.024456521739130436}\n", | |
"{'loss': 3.2124401092529298, 'learning_rate': 4.8641304347826086e-05, 'epoch': 0.02717391304347826}\n", | |
"{'loss': 3.115188789367676, 'learning_rate': 4.8505434782608696e-05, 'epoch': 0.029891304347826088}\n", | |
"{'loss': 3.172593116760254, 'learning_rate': 4.836956521739131e-05, 'epoch': 0.03260869565217391}\n", | |
"{'loss': 3.145791435241699, 'learning_rate': 4.823369565217392e-05, 'epoch': 0.035326086956521736}\n", | |
"{'loss': 3.1447931289672852, 'learning_rate': 4.809782608695653e-05, 'epoch': 0.03804347826086957}\n", | |
"{'loss': 3.1974361419677733, 'learning_rate': 4.796195652173913e-05, 'epoch': 0.04076086956521739}\n", | |
"{'loss': 3.1108156204223634, 'learning_rate': 4.782608695652174e-05, 'epoch': 0.043478260869565216}\n", | |
"{'loss': 3.144238090515137, 'learning_rate': 4.7690217391304345e-05, 'epoch': 0.04619565217391304}\n", | |
"{'loss': 3.1227893829345703, 'learning_rate': 4.7554347826086956e-05, 'epoch': 0.04891304347826087}\n", | |
"{'loss': 3.0857648849487305, 'learning_rate': 4.741847826086957e-05, 'epoch': 0.051630434782608696}\n", | |
"{'loss': 3.2329044342041016, 'learning_rate': 4.7282608695652177e-05, 'epoch': 0.05434782608695652}\n", | |
"{'loss': 3.1727392196655275, 'learning_rate': 4.714673913043479e-05, 'epoch': 0.057065217391304345}\n", | |
"{'loss': 3.2452533721923826, 'learning_rate': 4.701086956521739e-05, 'epoch': 0.059782608695652176}\n", | |
"{'loss': 3.2963642120361327, 'learning_rate': 4.6875e-05, 'epoch': 0.0625}\n", | |
"{'loss': 3.2465747833251952, 'learning_rate': 4.673913043478261e-05, 'epoch': 0.06521739130434782}\n", | |
"{'loss': 3.151161003112793, 'learning_rate': 4.660326086956522e-05, 'epoch': 0.06793478260869565}\n", | |
" 7% 500/7360 [01:15<17:13, 6.64it/s][INFO|trainer.py:1226] 2020-12-31 11:31:17,578 >> Saving model checkpoint to output/nosleep/checkpoint-500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:31:17,579 >> Configuration saved in output/nosleep/checkpoint-500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:31:19,343 >> Model weights saved in output/nosleep/checkpoint-500/pytorch_model.bin\n", | |
"{'loss': 3.2976707458496093, 'learning_rate': 4.646739130434783e-05, 'epoch': 0.07065217391304347}\n", | |
"{'loss': 3.0835899353027343, 'learning_rate': 4.6331521739130436e-05, 'epoch': 0.07336956521739131}\n", | |
"{'loss': 3.1966699600219726, 'learning_rate': 4.6195652173913046e-05, 'epoch': 0.07608695652173914}\n", | |
"{'loss': 3.1216732025146485, 'learning_rate': 4.6059782608695657e-05, 'epoch': 0.07880434782608696}\n", | |
"{'loss': 3.1534637451171874, 'learning_rate': 4.592391304347826e-05, 'epoch': 0.08152173913043478}\n", | |
"{'loss': 3.0839128494262695, 'learning_rate': 4.578804347826087e-05, 'epoch': 0.08423913043478261}\n", | |
"{'loss': 3.0870521545410154, 'learning_rate': 4.565217391304348e-05, 'epoch': 0.08695652173913043}\n", | |
"{'loss': 3.0487434387207033, 'learning_rate': 4.551630434782609e-05, 'epoch': 0.08967391304347826}\n", | |
"{'loss': 3.1483850479125977, 'learning_rate': 4.53804347826087e-05, 'epoch': 0.09239130434782608}\n", | |
"{'loss': 3.2189189910888674, 'learning_rate': 4.5244565217391305e-05, 'epoch': 0.09510869565217392}\n", | |
"{'loss': 3.171872138977051, 'learning_rate': 4.5108695652173916e-05, 'epoch': 0.09782608695652174}\n", | |
"{'loss': 3.1267330169677736, 'learning_rate': 4.4972826086956526e-05, 'epoch': 0.10054347826086957}\n", | |
"{'loss': 3.0810239791870115, 'learning_rate': 4.483695652173913e-05, 'epoch': 0.10326086956521739}\n", | |
"{'loss': 3.056863212585449, 'learning_rate': 4.470108695652174e-05, 'epoch': 0.10597826086956522}\n", | |
"{'loss': 3.1936498641967774, 'learning_rate': 4.456521739130435e-05, 'epoch': 0.10869565217391304}\n", | |
"{'loss': 3.0913599014282225, 'learning_rate': 4.442934782608696e-05, 'epoch': 0.11141304347826086}\n", | |
"{'loss': 3.075895309448242, 'learning_rate': 4.429347826086957e-05, 'epoch': 0.11413043478260869}\n", | |
"{'loss': 3.0935127258300783, 'learning_rate': 4.4157608695652175e-05, 'epoch': 0.11684782608695653}\n", | |
"{'loss': 3.05960750579834, 'learning_rate': 4.4021739130434786e-05, 'epoch': 0.11956521739130435}\n", | |
"{'loss': 3.026990509033203, 'learning_rate': 4.388586956521739e-05, 'epoch': 0.12228260869565218}\n", | |
"{'loss': 3.117096710205078, 'learning_rate': 4.375e-05, 'epoch': 0.125}\n", | |
"{'loss': 3.229228973388672, 'learning_rate': 4.361413043478261e-05, 'epoch': 0.12771739130434784}\n", | |
"{'loss': 3.178273391723633, 'learning_rate': 4.347826086956522e-05, 'epoch': 0.13043478260869565}\n", | |
"{'loss': 3.1459720611572264, 'learning_rate': 4.334239130434783e-05, 'epoch': 0.1331521739130435}\n", | |
"{'loss': 2.9881481170654296, 'learning_rate': 4.3206521739130434e-05, 'epoch': 0.1358695652173913}\n", | |
" 14% 1000/7360 [02:36<15:50, 6.69it/s][INFO|trainer.py:1226] 2020-12-31 11:32:38,577 >> Saving model checkpoint to output/nosleep/checkpoint-1000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:32:38,579 >> Configuration saved in output/nosleep/checkpoint-1000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:32:40,184 >> Model weights saved in output/nosleep/checkpoint-1000/pytorch_model.bin\n", | |
"{'loss': 3.059023857116699, 'learning_rate': 4.3070652173913045e-05, 'epoch': 0.13858695652173914}\n", | |
"{'loss': 3.04290657043457, 'learning_rate': 4.2934782608695655e-05, 'epoch': 0.14130434782608695}\n", | |
"{'loss': 2.915846824645996, 'learning_rate': 4.279891304347826e-05, 'epoch': 0.14402173913043478}\n", | |
"{'loss': 3.018391799926758, 'learning_rate': 4.266304347826087e-05, 'epoch': 0.14673913043478262}\n", | |
"{'loss': 3.1145843505859374, 'learning_rate': 4.252717391304348e-05, 'epoch': 0.14945652173913043}\n", | |
"{'loss': 3.124193000793457, 'learning_rate': 4.239130434782609e-05, 'epoch': 0.15217391304347827}\n", | |
"{'loss': 3.1068984985351564, 'learning_rate': 4.22554347826087e-05, 'epoch': 0.15489130434782608}\n", | |
"{'loss': 3.100969696044922, 'learning_rate': 4.2119565217391304e-05, 'epoch': 0.15760869565217392}\n", | |
"{'loss': 3.0650775909423826, 'learning_rate': 4.1983695652173914e-05, 'epoch': 0.16032608695652173}\n", | |
"{'loss': 3.067531204223633, 'learning_rate': 4.1847826086956525e-05, 'epoch': 0.16304347826086957}\n", | |
"{'loss': 3.1300127029418947, 'learning_rate': 4.171195652173913e-05, 'epoch': 0.16576086956521738}\n", | |
"{'loss': 3.0881221771240233, 'learning_rate': 4.1576086956521746e-05, 'epoch': 0.16847826086956522}\n", | |
"{'loss': 3.1220754623413085, 'learning_rate': 4.144021739130435e-05, 'epoch': 0.17119565217391305}\n", | |
"{'loss': 3.222932815551758, 'learning_rate': 4.130434782608696e-05, 'epoch': 0.17391304347826086}\n", | |
"{'loss': 3.0593658447265626, 'learning_rate': 4.116847826086957e-05, 'epoch': 0.1766304347826087}\n", | |
"{'loss': 3.018812561035156, 'learning_rate': 4.1032608695652174e-05, 'epoch': 0.1793478260869565}\n", | |
"{'loss': 3.0209110260009764, 'learning_rate': 4.0896739130434784e-05, 'epoch': 0.18206521739130435}\n", | |
"{'loss': 3.059507369995117, 'learning_rate': 4.076086956521739e-05, 'epoch': 0.18478260869565216}\n", | |
"{'loss': 3.0747259140014647, 'learning_rate': 4.0625000000000005e-05, 'epoch': 0.1875}\n", | |
"{'loss': 3.0447511672973633, 'learning_rate': 4.0489130434782615e-05, 'epoch': 0.19021739130434784}\n", | |
"{'loss': 3.0659013748168946, 'learning_rate': 4.035326086956522e-05, 'epoch': 0.19293478260869565}\n", | |
"{'loss': 2.887067413330078, 'learning_rate': 4.021739130434783e-05, 'epoch': 0.1956521739130435}\n", | |
"{'loss': 3.0112159729003904, 'learning_rate': 4.008152173913043e-05, 'epoch': 0.1983695652173913}\n", | |
"{'loss': 3.152726936340332, 'learning_rate': 3.994565217391304e-05, 'epoch': 0.20108695652173914}\n", | |
"{'loss': 3.0397472381591797, 'learning_rate': 3.9809782608695654e-05, 'epoch': 0.20380434782608695}\n", | |
" 20% 1500/7360 [03:58<14:44, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:34:00,695 >> Saving model checkpoint to output/nosleep/checkpoint-1500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:34:00,697 >> Configuration saved in output/nosleep/checkpoint-1500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:34:02,420 >> Model weights saved in output/nosleep/checkpoint-1500/pytorch_model.bin\n", | |
"{'loss': 2.997607612609863, 'learning_rate': 3.9673913043478264e-05, 'epoch': 0.20652173913043478}\n", | |
"{'loss': 3.096424865722656, 'learning_rate': 3.9538043478260875e-05, 'epoch': 0.20923913043478262}\n", | |
"{'loss': 3.0459573745727537, 'learning_rate': 3.940217391304348e-05, 'epoch': 0.21195652173913043}\n", | |
"{'loss': 3.046352577209473, 'learning_rate': 3.926630434782609e-05, 'epoch': 0.21467391304347827}\n", | |
"{'loss': 3.054677391052246, 'learning_rate': 3.91304347826087e-05, 'epoch': 0.21739130434782608}\n", | |
"{'loss': 2.96755428314209, 'learning_rate': 3.89945652173913e-05, 'epoch': 0.22010869565217392}\n", | |
"{'loss': 3.183602714538574, 'learning_rate': 3.885869565217391e-05, 'epoch': 0.22282608695652173}\n", | |
"{'loss': 3.0821224212646485, 'learning_rate': 3.8722826086956523e-05, 'epoch': 0.22554347826086957}\n", | |
"{'loss': 3.0650859832763673, 'learning_rate': 3.8586956521739134e-05, 'epoch': 0.22826086956521738}\n", | |
"{'loss': 3.0474729537963867, 'learning_rate': 3.8451086956521744e-05, 'epoch': 0.23097826086956522}\n", | |
"{'loss': 2.991226387023926, 'learning_rate': 3.831521739130435e-05, 'epoch': 0.23369565217391305}\n", | |
"{'loss': 3.0636838912963866, 'learning_rate': 3.817934782608696e-05, 'epoch': 0.23641304347826086}\n", | |
"{'loss': 3.107697296142578, 'learning_rate': 3.804347826086957e-05, 'epoch': 0.2391304347826087}\n", | |
"{'loss': 3.0795093536376954, 'learning_rate': 3.790760869565217e-05, 'epoch': 0.2418478260869565}\n", | |
"{'loss': 3.0636316299438477, 'learning_rate': 3.777173913043478e-05, 'epoch': 0.24456521739130435}\n", | |
"{'loss': 3.1231103897094727, 'learning_rate': 3.763586956521739e-05, 'epoch': 0.24728260869565216}\n", | |
"{'loss': 3.0773599624633787, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.25}\n", | |
"{'loss': 3.0005971908569338, 'learning_rate': 3.7364130434782614e-05, 'epoch': 0.25271739130434784}\n", | |
"{'loss': 3.0077032089233398, 'learning_rate': 3.722826086956522e-05, 'epoch': 0.2554347826086957}\n", | |
"{'loss': 3.0258026123046875, 'learning_rate': 3.709239130434783e-05, 'epoch': 0.25815217391304346}\n", | |
"{'loss': 3.10824031829834, 'learning_rate': 3.695652173913043e-05, 'epoch': 0.2608695652173913}\n", | |
"{'loss': 3.023676872253418, 'learning_rate': 3.682065217391304e-05, 'epoch': 0.26358695652173914}\n", | |
"{'loss': 2.993247222900391, 'learning_rate': 3.668478260869566e-05, 'epoch': 0.266304347826087}\n", | |
"{'loss': 3.052256774902344, 'learning_rate': 3.654891304347826e-05, 'epoch': 0.26902173913043476}\n", | |
"{'loss': 3.0760807037353515, 'learning_rate': 3.641304347826087e-05, 'epoch': 0.2717391304347826}\n", | |
" 27% 2000/7360 [05:20<13:46, 6.49it/s][INFO|trainer.py:1226] 2020-12-31 11:35:22,941 >> Saving model checkpoint to output/nosleep/checkpoint-2000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:35:22,943 >> Configuration saved in output/nosleep/checkpoint-2000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:35:24,589 >> Model weights saved in output/nosleep/checkpoint-2000/pytorch_model.bin\n", | |
"{'loss': 3.013957214355469, 'learning_rate': 3.627717391304348e-05, 'epoch': 0.27445652173913043}\n", | |
"{'loss': 3.1673465728759767, 'learning_rate': 3.614130434782609e-05, 'epoch': 0.27717391304347827}\n", | |
"{'loss': 3.0540140151977537, 'learning_rate': 3.60054347826087e-05, 'epoch': 0.2798913043478261}\n", | |
"{'loss': 3.0425041198730467, 'learning_rate': 3.58695652173913e-05, 'epoch': 0.2826086956521739}\n", | |
"{'loss': 3.078123664855957, 'learning_rate': 3.573369565217392e-05, 'epoch': 0.28532608695652173}\n", | |
"{'loss': 3.130359649658203, 'learning_rate': 3.559782608695653e-05, 'epoch': 0.28804347826086957}\n", | |
"{'loss': 3.161348342895508, 'learning_rate': 3.546195652173913e-05, 'epoch': 0.2907608695652174}\n", | |
"{'loss': 2.9576461791992186, 'learning_rate': 3.532608695652174e-05, 'epoch': 0.29347826086956524}\n", | |
"{'loss': 3.111285400390625, 'learning_rate': 3.5190217391304346e-05, 'epoch': 0.296195652173913}\n", | |
"{'loss': 3.0396650314331053, 'learning_rate': 3.505434782608696e-05, 'epoch': 0.29891304347826086}\n", | |
"{'loss': 3.071572685241699, 'learning_rate': 3.491847826086957e-05, 'epoch': 0.3016304347826087}\n", | |
"{'loss': 3.0055158615112303, 'learning_rate': 3.478260869565218e-05, 'epoch': 0.30434782608695654}\n", | |
"{'loss': 2.9725093841552734, 'learning_rate': 3.464673913043479e-05, 'epoch': 0.3070652173913043}\n", | |
"{'loss': 2.9921489715576173, 'learning_rate': 3.451086956521739e-05, 'epoch': 0.30978260869565216}\n", | |
"{'loss': 3.0546697616577148, 'learning_rate': 3.4375e-05, 'epoch': 0.3125}\n", | |
"{'loss': 2.9978811264038088, 'learning_rate': 3.423913043478261e-05, 'epoch': 0.31521739130434784}\n", | |
"{'loss': 3.1139686584472654, 'learning_rate': 3.4103260869565216e-05, 'epoch': 0.3179347826086957}\n", | |
"{'loss': 3.1690120697021484, 'learning_rate': 3.3967391304347826e-05, 'epoch': 0.32065217391304346}\n", | |
"{'loss': 2.947348213195801, 'learning_rate': 3.383152173913044e-05, 'epoch': 0.3233695652173913}\n", | |
"{'loss': 3.0949867248535154, 'learning_rate': 3.369565217391305e-05, 'epoch': 0.32608695652173914}\n", | |
"{'loss': 2.9547935485839845, 'learning_rate': 3.355978260869566e-05, 'epoch': 0.328804347826087}\n", | |
"{'loss': 2.8996337890625, 'learning_rate': 3.342391304347826e-05, 'epoch': 0.33152173913043476}\n", | |
"{'loss': 2.9912178039550783, 'learning_rate': 3.328804347826087e-05, 'epoch': 0.3342391304347826}\n", | |
"{'loss': 3.1216081619262694, 'learning_rate': 3.3152173913043475e-05, 'epoch': 0.33695652173913043}\n", | |
"{'loss': 2.9457778930664062, 'learning_rate': 3.3016304347826086e-05, 'epoch': 0.33967391304347827}\n", | |
" 34% 2500/7360 [06:41<12:14, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:36:44,059 >> Saving model checkpoint to output/nosleep/checkpoint-2500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:36:44,060 >> Configuration saved in output/nosleep/checkpoint-2500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:36:45,639 >> Model weights saved in output/nosleep/checkpoint-2500/pytorch_model.bin\n", | |
"{'loss': 3.0662458419799803, 'learning_rate': 3.2880434782608696e-05, 'epoch': 0.3423913043478261}\n", | |
"{'loss': 2.989406967163086, 'learning_rate': 3.2744565217391307e-05, 'epoch': 0.3451086956521739}\n", | |
"{'loss': 3.053440475463867, 'learning_rate': 3.260869565217392e-05, 'epoch': 0.34782608695652173}\n", | |
"{'loss': 2.9125459671020506, 'learning_rate': 3.247282608695653e-05, 'epoch': 0.35054347826086957}\n", | |
"{'loss': 3.0044492721557616, 'learning_rate': 3.233695652173913e-05, 'epoch': 0.3532608695652174}\n", | |
"{'loss': 2.9832174301147463, 'learning_rate': 3.220108695652174e-05, 'epoch': 0.35597826086956524}\n", | |
"{'loss': 3.0895017623901366, 'learning_rate': 3.2065217391304345e-05, 'epoch': 0.358695652173913}\n", | |
"{'loss': 3.051135444641113, 'learning_rate': 3.1929347826086955e-05, 'epoch': 0.36141304347826086}\n", | |
"{'loss': 3.0140249252319338, 'learning_rate': 3.1793478260869566e-05, 'epoch': 0.3641304347826087}\n", | |
"{'loss': 2.9950437545776367, 'learning_rate': 3.1657608695652176e-05, 'epoch': 0.36684782608695654}\n", | |
"{'loss': 3.102407455444336, 'learning_rate': 3.152173913043479e-05, 'epoch': 0.3695652173913043}\n", | |
"{'loss': 3.0337915420532227, 'learning_rate': 3.138586956521739e-05, 'epoch': 0.37228260869565216}\n", | |
"{'loss': 3.0124744415283202, 'learning_rate': 3.125e-05, 'epoch': 0.375}\n", | |
"{'loss': 2.9697227478027344, 'learning_rate': 3.111413043478261e-05, 'epoch': 0.37771739130434784}\n", | |
"{'loss': 2.954401397705078, 'learning_rate': 3.0978260869565215e-05, 'epoch': 0.3804347826086957}\n", | |
"{'loss': 3.0465015411376952, 'learning_rate': 3.084239130434783e-05, 'epoch': 0.38315217391304346}\n", | |
"{'loss': 2.9104570388793944, 'learning_rate': 3.0706521739130435e-05, 'epoch': 0.3858695652173913}\n", | |
"{'loss': 2.96370792388916, 'learning_rate': 3.0570652173913046e-05, 'epoch': 0.38858695652173914}\n", | |
"{'loss': 3.0235336303710936, 'learning_rate': 3.0434782608695656e-05, 'epoch': 0.391304347826087}\n", | |
"{'loss': 2.9881025314331056, 'learning_rate': 3.029891304347826e-05, 'epoch': 0.39402173913043476}\n", | |
"{'loss': 2.9734582901000977, 'learning_rate': 3.016304347826087e-05, 'epoch': 0.3967391304347826}\n", | |
"{'loss': 3.036972427368164, 'learning_rate': 3.0027173913043477e-05, 'epoch': 0.39945652173913043}\n", | |
"{'loss': 3.0409944534301756, 'learning_rate': 2.9891304347826088e-05, 'epoch': 0.40217391304347827}\n", | |
"{'loss': 2.9913082122802734, 'learning_rate': 2.9755434782608698e-05, 'epoch': 0.4048913043478261}\n", | |
"{'loss': 2.992050552368164, 'learning_rate': 2.9619565217391305e-05, 'epoch': 0.4076086956521739}\n", | |
" 41% 3000/7360 [08:02<10:53, 6.67it/s][INFO|trainer.py:1226] 2020-12-31 11:38:05,098 >> Saving model checkpoint to output/nosleep/checkpoint-3000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:38:05,100 >> Configuration saved in output/nosleep/checkpoint-3000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:38:06,790 >> Model weights saved in output/nosleep/checkpoint-3000/pytorch_model.bin\n", | |
"{'loss': 3.023343086242676, 'learning_rate': 2.9483695652173916e-05, 'epoch': 0.41032608695652173}\n", | |
"{'loss': 2.97615966796875, 'learning_rate': 2.9347826086956526e-05, 'epoch': 0.41304347826086957}\n", | |
"{'loss': 3.0653865814208983, 'learning_rate': 2.921195652173913e-05, 'epoch': 0.4157608695652174}\n", | |
"{'loss': 3.013285255432129, 'learning_rate': 2.9076086956521743e-05, 'epoch': 0.41847826086956524}\n", | |
"{'loss': 3.074570083618164, 'learning_rate': 2.8940217391304347e-05, 'epoch': 0.421195652173913}\n", | |
"{'loss': 2.9360151290893555, 'learning_rate': 2.8804347826086957e-05, 'epoch': 0.42391304347826086}\n", | |
"{'loss': 3.05234375, 'learning_rate': 2.8668478260869568e-05, 'epoch': 0.4266304347826087}\n", | |
"{'loss': 2.893931579589844, 'learning_rate': 2.8532608695652175e-05, 'epoch': 0.42934782608695654}\n", | |
"{'loss': 2.9600061416625976, 'learning_rate': 2.8396739130434785e-05, 'epoch': 0.4320652173913043}\n", | |
"{'loss': 2.9860197067260743, 'learning_rate': 2.826086956521739e-05, 'epoch': 0.43478260869565216}\n", | |
"{'loss': 3.013777160644531, 'learning_rate': 2.8125000000000003e-05, 'epoch': 0.4375}\n", | |
"{'loss': 2.9207672119140624, 'learning_rate': 2.7989130434782613e-05, 'epoch': 0.44021739130434784}\n", | |
"{'loss': 2.9867345809936525, 'learning_rate': 2.7853260869565217e-05, 'epoch': 0.4429347826086957}\n", | |
"{'loss': 2.9700445175170898, 'learning_rate': 2.7717391304347827e-05, 'epoch': 0.44565217391304346}\n", | |
"{'loss': 3.021417999267578, 'learning_rate': 2.7581521739130434e-05, 'epoch': 0.4483695652173913}\n", | |
"{'loss': 3.0516029357910157, 'learning_rate': 2.7445652173913044e-05, 'epoch': 0.45108695652173914}\n", | |
"{'loss': 2.995887565612793, 'learning_rate': 2.7309782608695655e-05, 'epoch': 0.453804347826087}\n", | |
"{'loss': 2.996470260620117, 'learning_rate': 2.7173913043478262e-05, 'epoch': 0.45652173913043476}\n", | |
"{'loss': 2.8460014343261717, 'learning_rate': 2.7038043478260872e-05, 'epoch': 0.4592391304347826}\n", | |
"{'loss': 3.0168664932250975, 'learning_rate': 2.6902173913043476e-05, 'epoch': 0.46195652173913043}\n", | |
"{'loss': 2.973033905029297, 'learning_rate': 2.6766304347826086e-05, 'epoch': 0.46467391304347827}\n", | |
"{'loss': 2.910268211364746, 'learning_rate': 2.66304347826087e-05, 'epoch': 0.4673913043478261}\n", | |
"{'loss': 2.9323522567749025, 'learning_rate': 2.6494565217391304e-05, 'epoch': 0.4701086956521739}\n", | |
"{'loss': 3.0819915771484374, 'learning_rate': 2.6358695652173914e-05, 'epoch': 0.47282608695652173}\n", | |
"{'loss': 3.013107681274414, 'learning_rate': 2.6222826086956525e-05, 'epoch': 0.47554347826086957}\n", | |
" 48% 3500/7360 [09:24<09:46, 6.59it/s][INFO|trainer.py:1226] 2020-12-31 11:39:26,361 >> Saving model checkpoint to output/nosleep/checkpoint-3500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:39:26,362 >> Configuration saved in output/nosleep/checkpoint-3500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:39:28,029 >> Model weights saved in output/nosleep/checkpoint-3500/pytorch_model.bin\n", | |
"{'loss': 2.909230422973633, 'learning_rate': 2.608695652173913e-05, 'epoch': 0.4782608695652174}\n", | |
"{'loss': 3.0446781158447265, 'learning_rate': 2.5951086956521742e-05, 'epoch': 0.48097826086956524}\n", | |
"{'loss': 3.002200126647949, 'learning_rate': 2.5815217391304346e-05, 'epoch': 0.483695652173913}\n", | |
"{'loss': 3.04788875579834, 'learning_rate': 2.567934782608696e-05, 'epoch': 0.48641304347826086}\n", | |
"{'loss': 2.780653381347656, 'learning_rate': 2.554347826086957e-05, 'epoch': 0.4891304347826087}\n", | |
"{'loss': 2.972218704223633, 'learning_rate': 2.5407608695652173e-05, 'epoch': 0.49184782608695654}\n", | |
"{'loss': 3.0140708923339843, 'learning_rate': 2.5271739130434784e-05, 'epoch': 0.4945652173913043}\n", | |
"{'loss': 2.8474166870117186, 'learning_rate': 2.513586956521739e-05, 'epoch': 0.49728260869565216}\n", | |
"{'loss': 3.0726463317871096, 'learning_rate': 2.5e-05, 'epoch': 0.5}\n", | |
"{'loss': 3.059174346923828, 'learning_rate': 2.4864130434782608e-05, 'epoch': 0.5027173913043478}\n", | |
"{'loss': 2.915945816040039, 'learning_rate': 2.472826086956522e-05, 'epoch': 0.5054347826086957}\n", | |
"{'loss': 2.938085746765137, 'learning_rate': 2.459239130434783e-05, 'epoch': 0.5081521739130435}\n", | |
"{'loss': 2.9889217376708985, 'learning_rate': 2.4456521739130436e-05, 'epoch': 0.5108695652173914}\n", | |
"{'loss': 3.0353281021118166, 'learning_rate': 2.4320652173913043e-05, 'epoch': 0.5135869565217391}\n", | |
"{'loss': 3.0933725357055666, 'learning_rate': 2.4184782608695653e-05, 'epoch': 0.5163043478260869}\n", | |
"{'loss': 2.8329307556152346, 'learning_rate': 2.4048913043478264e-05, 'epoch': 0.5190217391304348}\n", | |
"{'loss': 2.938851165771484, 'learning_rate': 2.391304347826087e-05, 'epoch': 0.5217391304347826}\n", | |
"{'loss': 3.106460762023926, 'learning_rate': 2.3777173913043478e-05, 'epoch': 0.5244565217391305}\n", | |
"{'loss': 2.889130401611328, 'learning_rate': 2.3641304347826088e-05, 'epoch': 0.5271739130434783}\n", | |
"{'loss': 2.952214241027832, 'learning_rate': 2.3505434782608695e-05, 'epoch': 0.529891304347826}\n", | |
"{'loss': 3.0268835067749023, 'learning_rate': 2.3369565217391306e-05, 'epoch': 0.532608695652174}\n", | |
"{'loss': 2.8543680191040037, 'learning_rate': 2.3233695652173916e-05, 'epoch': 0.5353260869565217}\n", | |
"{'loss': 2.91422176361084, 'learning_rate': 2.3097826086956523e-05, 'epoch': 0.5380434782608695}\n", | |
"{'loss': 2.9302282333374023, 'learning_rate': 2.296195652173913e-05, 'epoch': 0.5407608695652174}\n", | |
"{'loss': 2.9321842193603516, 'learning_rate': 2.282608695652174e-05, 'epoch': 0.5434782608695652}\n", | |
" 54% 4000/7360 [10:45<08:21, 6.70it/s][INFO|trainer.py:1226] 2020-12-31 11:40:47,338 >> Saving model checkpoint to output/nosleep/checkpoint-4000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:40:47,340 >> Configuration saved in output/nosleep/checkpoint-4000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:40:48,945 >> Model weights saved in output/nosleep/checkpoint-4000/pytorch_model.bin\n", | |
"{'loss': 2.8125045776367186, 'learning_rate': 2.269021739130435e-05, 'epoch': 0.5461956521739131}\n", | |
"{'loss': 2.896123695373535, 'learning_rate': 2.2554347826086958e-05, 'epoch': 0.5489130434782609}\n", | |
"{'loss': 2.864361381530762, 'learning_rate': 2.2418478260869565e-05, 'epoch': 0.5516304347826086}\n", | |
"{'loss': 2.89820556640625, 'learning_rate': 2.2282608695652175e-05, 'epoch': 0.5543478260869565}\n", | |
"{'loss': 2.921078109741211, 'learning_rate': 2.2146739130434786e-05, 'epoch': 0.5570652173913043}\n", | |
"{'loss': 2.879536247253418, 'learning_rate': 2.2010869565217393e-05, 'epoch': 0.5597826086956522}\n", | |
"{'loss': 3.100284194946289, 'learning_rate': 2.1875e-05, 'epoch': 0.5625}\n", | |
"{'loss': 2.955865669250488, 'learning_rate': 2.173913043478261e-05, 'epoch': 0.5652173913043478}\n", | |
"{'loss': 2.966480827331543, 'learning_rate': 2.1603260869565217e-05, 'epoch': 0.5679347826086957}\n", | |
"{'loss': 2.9319448471069336, 'learning_rate': 2.1467391304347828e-05, 'epoch': 0.5706521739130435}\n", | |
"{'loss': 2.9581552505493165, 'learning_rate': 2.1331521739130435e-05, 'epoch': 0.5733695652173914}\n", | |
"{'loss': 2.8463247299194334, 'learning_rate': 2.1195652173913045e-05, 'epoch': 0.5760869565217391}\n", | |
"{'loss': 3.006674575805664, 'learning_rate': 2.1059782608695652e-05, 'epoch': 0.5788043478260869}\n", | |
"{'loss': 2.8334024429321287, 'learning_rate': 2.0923913043478262e-05, 'epoch': 0.5815217391304348}\n", | |
"{'loss': 2.8948385238647463, 'learning_rate': 2.0788043478260873e-05, 'epoch': 0.5842391304347826}\n", | |
"{'loss': 2.8607654571533203, 'learning_rate': 2.065217391304348e-05, 'epoch': 0.5869565217391305}\n", | |
"{'loss': 2.9063913345336916, 'learning_rate': 2.0516304347826087e-05, 'epoch': 0.5896739130434783}\n", | |
"{'loss': 2.932078170776367, 'learning_rate': 2.0380434782608694e-05, 'epoch': 0.592391304347826}\n", | |
"{'loss': 2.856118392944336, 'learning_rate': 2.0244565217391308e-05, 'epoch': 0.595108695652174}\n", | |
"{'loss': 2.9656396865844727, 'learning_rate': 2.0108695652173915e-05, 'epoch': 0.5978260869565217}\n", | |
"{'loss': 2.890290451049805, 'learning_rate': 1.997282608695652e-05, 'epoch': 0.6005434782608695}\n", | |
"{'loss': 3.000225639343262, 'learning_rate': 1.9836956521739132e-05, 'epoch': 0.6032608695652174}\n", | |
"{'loss': 3.014234733581543, 'learning_rate': 1.970108695652174e-05, 'epoch': 0.6059782608695652}\n", | |
"{'loss': 2.9607624053955077, 'learning_rate': 1.956521739130435e-05, 'epoch': 0.6086956521739131}\n", | |
"{'loss': 2.9253002166748048, 'learning_rate': 1.9429347826086957e-05, 'epoch': 0.6114130434782609}\n", | |
" 61% 4500/7360 [12:06<07:13, 6.59it/s][INFO|trainer.py:1226] 2020-12-31 11:42:08,834 >> Saving model checkpoint to output/nosleep/checkpoint-4500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:42:08,835 >> Configuration saved in output/nosleep/checkpoint-4500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:42:10,483 >> Model weights saved in output/nosleep/checkpoint-4500/pytorch_model.bin\n", | |
"{'loss': 2.8905784606933596, 'learning_rate': 1.9293478260869567e-05, 'epoch': 0.6141304347826086}\n", | |
"{'loss': 2.9393293380737306, 'learning_rate': 1.9157608695652174e-05, 'epoch': 0.6168478260869565}\n", | |
"{'loss': 2.853158378601074, 'learning_rate': 1.9021739130434784e-05, 'epoch': 0.6195652173913043}\n", | |
"{'loss': 2.912156677246094, 'learning_rate': 1.888586956521739e-05, 'epoch': 0.6222826086956522}\n", | |
"{'loss': 2.991878318786621, 'learning_rate': 1.8750000000000002e-05, 'epoch': 0.625}\n", | |
"{'loss': 2.8975969314575196, 'learning_rate': 1.861413043478261e-05, 'epoch': 0.6277173913043478}\n", | |
"{'loss': 2.926823616027832, 'learning_rate': 1.8478260869565216e-05, 'epoch': 0.6304347826086957}\n", | |
"{'loss': 2.9280727386474608, 'learning_rate': 1.834239130434783e-05, 'epoch': 0.6331521739130435}\n", | |
"{'loss': 2.8288547515869142, 'learning_rate': 1.8206521739130437e-05, 'epoch': 0.6358695652173914}\n", | |
"{'loss': 2.947658348083496, 'learning_rate': 1.8070652173913044e-05, 'epoch': 0.6385869565217391}\n", | |
"{'loss': 2.917711639404297, 'learning_rate': 1.793478260869565e-05, 'epoch': 0.6413043478260869}\n", | |
"{'loss': 3.0101985931396484, 'learning_rate': 1.7798913043478264e-05, 'epoch': 0.6440217391304348}\n", | |
"{'loss': 2.986308288574219, 'learning_rate': 1.766304347826087e-05, 'epoch': 0.6467391304347826}\n", | |
"{'loss': 2.9199193954467773, 'learning_rate': 1.752717391304348e-05, 'epoch': 0.6494565217391305}\n", | |
"{'loss': 2.920822525024414, 'learning_rate': 1.739130434782609e-05, 'epoch': 0.6521739130434783}\n", | |
"{'loss': 2.9012218475341798, 'learning_rate': 1.7255434782608696e-05, 'epoch': 0.654891304347826}\n", | |
"{'loss': 2.9841341018676757, 'learning_rate': 1.7119565217391306e-05, 'epoch': 0.657608695652174}\n", | |
"{'loss': 2.8883106231689455, 'learning_rate': 1.6983695652173913e-05, 'epoch': 0.6603260869565217}\n", | |
"{'loss': 2.9954706192016602, 'learning_rate': 1.6847826086956524e-05, 'epoch': 0.6630434782608695}\n", | |
"{'loss': 3.049188995361328, 'learning_rate': 1.671195652173913e-05, 'epoch': 0.6657608695652174}\n", | |
"{'loss': 2.8762126922607423, 'learning_rate': 1.6576086956521738e-05, 'epoch': 0.6684782608695652}\n", | |
"{'loss': 2.901471710205078, 'learning_rate': 1.6440217391304348e-05, 'epoch': 0.6711956521739131}\n", | |
"{'loss': 2.7890932083129885, 'learning_rate': 1.630434782608696e-05, 'epoch': 0.6739130434782609}\n", | |
"{'loss': 2.9092123031616213, 'learning_rate': 1.6168478260869565e-05, 'epoch': 0.6766304347826086}\n", | |
"{'loss': 2.905811309814453, 'learning_rate': 1.6032608695652173e-05, 'epoch': 0.6793478260869565}\n", | |
" 68% 5000/7360 [13:27<05:53, 6.68it/s][INFO|trainer.py:1226] 2020-12-31 11:43:30,013 >> Saving model checkpoint to output/nosleep/checkpoint-5000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:43:30,014 >> Configuration saved in output/nosleep/checkpoint-5000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:43:31,742 >> Model weights saved in output/nosleep/checkpoint-5000/pytorch_model.bin\n", | |
"{'loss': 3.02506046295166, 'learning_rate': 1.5896739130434783e-05, 'epoch': 0.6820652173913043}\n", | |
"{'loss': 2.952139663696289, 'learning_rate': 1.5760869565217393e-05, 'epoch': 0.6847826086956522}\n", | |
"{'loss': 2.9070606231689453, 'learning_rate': 1.5625e-05, 'epoch': 0.6875}\n", | |
"{'loss': 2.934033966064453, 'learning_rate': 1.5489130434782607e-05, 'epoch': 0.6902173913043478}\n", | |
"{'loss': 2.973927688598633, 'learning_rate': 1.5353260869565218e-05, 'epoch': 0.6929347826086957}\n", | |
"{'loss': 2.9208562850952147, 'learning_rate': 1.5217391304347828e-05, 'epoch': 0.6956521739130435}\n", | |
"{'loss': 2.8716516494750977, 'learning_rate': 1.5081521739130435e-05, 'epoch': 0.6983695652173914}\n", | |
"{'loss': 3.008540916442871, 'learning_rate': 1.4945652173913044e-05, 'epoch': 0.7010869565217391}\n", | |
"{'loss': 2.9472280502319337, 'learning_rate': 1.4809782608695653e-05, 'epoch': 0.7038043478260869}\n", | |
"{'loss': 2.980273628234863, 'learning_rate': 1.4673913043478263e-05, 'epoch': 0.7065217391304348}\n", | |
"{'loss': 2.887953758239746, 'learning_rate': 1.4538043478260872e-05, 'epoch': 0.7092391304347826}\n", | |
"{'loss': 2.964910125732422, 'learning_rate': 1.4402173913043479e-05, 'epoch': 0.7119565217391305}\n", | |
"{'loss': 2.907693290710449, 'learning_rate': 1.4266304347826087e-05, 'epoch': 0.7146739130434783}\n", | |
"{'loss': 2.995789337158203, 'learning_rate': 1.4130434782608694e-05, 'epoch': 0.717391304347826}\n", | |
"{'loss': 2.919285202026367, 'learning_rate': 1.3994565217391307e-05, 'epoch': 0.720108695652174}\n", | |
"{'loss': 2.891135025024414, 'learning_rate': 1.3858695652173914e-05, 'epoch': 0.7228260869565217}\n", | |
"{'loss': 2.890410041809082, 'learning_rate': 1.3722826086956522e-05, 'epoch': 0.7255434782608695}\n", | |
"{'loss': 2.884817695617676, 'learning_rate': 1.3586956521739131e-05, 'epoch': 0.7282608695652174}\n", | |
"{'loss': 2.9657230377197266, 'learning_rate': 1.3451086956521738e-05, 'epoch': 0.7309782608695652}\n", | |
"{'loss': 2.885117530822754, 'learning_rate': 1.331521739130435e-05, 'epoch': 0.7336956521739131}\n", | |
"{'loss': 2.961221694946289, 'learning_rate': 1.3179347826086957e-05, 'epoch': 0.7364130434782609}\n", | |
"{'loss': 2.890369415283203, 'learning_rate': 1.3043478260869566e-05, 'epoch': 0.7391304347826086}\n", | |
"{'loss': 2.9881954193115234, 'learning_rate': 1.2907608695652173e-05, 'epoch': 0.7418478260869565}\n", | |
"{'loss': 2.870479774475098, 'learning_rate': 1.2771739130434785e-05, 'epoch': 0.7445652173913043}\n", | |
"{'loss': 2.900164794921875, 'learning_rate': 1.2635869565217392e-05, 'epoch': 0.7472826086956522}\n", | |
" 75% 5500/7360 [14:48<04:37, 6.71it/s][INFO|trainer.py:1226] 2020-12-31 11:44:51,058 >> Saving model checkpoint to output/nosleep/checkpoint-5500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:44:51,059 >> Configuration saved in output/nosleep/checkpoint-5500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:44:52,709 >> Model weights saved in output/nosleep/checkpoint-5500/pytorch_model.bin\n", | |
"{'loss': 2.874920654296875, 'learning_rate': 1.25e-05, 'epoch': 0.75}\n", | |
"{'loss': 2.9421880722045897, 'learning_rate': 1.236413043478261e-05, 'epoch': 0.7527173913043478}\n", | |
"{'loss': 2.9182043075561523, 'learning_rate': 1.2228260869565218e-05, 'epoch': 0.7554347826086957}\n", | |
"{'loss': 2.853078842163086, 'learning_rate': 1.2092391304347827e-05, 'epoch': 0.7581521739130435}\n", | |
"{'loss': 2.754827880859375, 'learning_rate': 1.1956521739130435e-05, 'epoch': 0.7608695652173914}\n", | |
"{'loss': 3.0058160781860352, 'learning_rate': 1.1820652173913044e-05, 'epoch': 0.7635869565217391}\n", | |
"{'loss': 2.938898468017578, 'learning_rate': 1.1684782608695653e-05, 'epoch': 0.7663043478260869}\n", | |
"{'loss': 2.9451087951660155, 'learning_rate': 1.1548913043478262e-05, 'epoch': 0.7690217391304348}\n", | |
"{'loss': 2.832347869873047, 'learning_rate': 1.141304347826087e-05, 'epoch': 0.7717391304347826}\n", | |
"{'loss': 2.9098974227905274, 'learning_rate': 1.1277173913043479e-05, 'epoch': 0.7744565217391305}\n", | |
"{'loss': 2.992401695251465, 'learning_rate': 1.1141304347826088e-05, 'epoch': 0.7771739130434783}\n", | |
"{'loss': 2.7906036376953125, 'learning_rate': 1.1005434782608696e-05, 'epoch': 0.779891304347826}\n", | |
"{'loss': 2.887853240966797, 'learning_rate': 1.0869565217391305e-05, 'epoch': 0.782608695652174}\n", | |
"{'loss': 2.8920427322387696, 'learning_rate': 1.0733695652173914e-05, 'epoch': 0.7853260869565217}\n", | |
"{'loss': 2.8993629455566405, 'learning_rate': 1.0597826086956523e-05, 'epoch': 0.7880434782608695}\n", | |
"{'loss': 2.9929531097412108, 'learning_rate': 1.0461956521739131e-05, 'epoch': 0.7907608695652174}\n", | |
"{'loss': 2.79913272857666, 'learning_rate': 1.032608695652174e-05, 'epoch': 0.7934782608695652}\n", | |
"{'loss': 2.822602462768555, 'learning_rate': 1.0190217391304347e-05, 'epoch': 0.7961956521739131}\n", | |
"{'loss': 2.8938386917114256, 'learning_rate': 1.0054347826086957e-05, 'epoch': 0.7989130434782609}\n", | |
"{'loss': 2.9126588821411135, 'learning_rate': 9.918478260869566e-06, 'epoch': 0.8016304347826086}\n", | |
"{'loss': 2.9466136932373046, 'learning_rate': 9.782608695652175e-06, 'epoch': 0.8043478260869565}\n", | |
"{'loss': 2.766586685180664, 'learning_rate': 9.646739130434783e-06, 'epoch': 0.8070652173913043}\n", | |
"{'loss': 2.846967315673828, 'learning_rate': 9.510869565217392e-06, 'epoch': 0.8097826086956522}\n", | |
"{'loss': 2.7293128967285156, 'learning_rate': 9.375000000000001e-06, 'epoch': 0.8125}\n", | |
"{'loss': 2.959208297729492, 'learning_rate': 9.239130434782608e-06, 'epoch': 0.8152173913043478}\n", | |
" 82% 6000/7360 [16:09<03:21, 6.74it/s][INFO|trainer.py:1226] 2020-12-31 11:46:12,110 >> Saving model checkpoint to output/nosleep/checkpoint-6000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:46:12,111 >> Configuration saved in output/nosleep/checkpoint-6000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:46:13,862 >> Model weights saved in output/nosleep/checkpoint-6000/pytorch_model.bin\n", | |
"{'loss': 2.9754858016967773, 'learning_rate': 9.103260869565218e-06, 'epoch': 0.8179347826086957}\n", | |
"{'loss': 2.885609817504883, 'learning_rate': 8.967391304347825e-06, 'epoch': 0.8206521739130435}\n", | |
"{'loss': 2.9586454391479493, 'learning_rate': 8.831521739130436e-06, 'epoch': 0.8233695652173914}\n", | |
"{'loss': 2.8784345626831054, 'learning_rate': 8.695652173913044e-06, 'epoch': 0.8260869565217391}\n", | |
"{'loss': 2.879836654663086, 'learning_rate': 8.559782608695653e-06, 'epoch': 0.8288043478260869}\n", | |
"{'loss': 2.8585273742675783, 'learning_rate': 8.423913043478262e-06, 'epoch': 0.8315217391304348}\n", | |
"{'loss': 2.9230180740356446, 'learning_rate': 8.288043478260869e-06, 'epoch': 0.8342391304347826}\n", | |
"{'loss': 2.80902042388916, 'learning_rate': 8.15217391304348e-06, 'epoch': 0.8369565217391305}\n", | |
"{'loss': 2.821381378173828, 'learning_rate': 8.016304347826086e-06, 'epoch': 0.8396739130434783}\n", | |
"{'loss': 2.922434616088867, 'learning_rate': 7.880434782608697e-06, 'epoch': 0.842391304347826}\n", | |
"{'loss': 2.908469390869141, 'learning_rate': 7.744565217391304e-06, 'epoch': 0.845108695652174}\n", | |
"{'loss': 3.046040153503418, 'learning_rate': 7.608695652173914e-06, 'epoch': 0.8478260869565217}\n", | |
"{'loss': 2.9647945404052733, 'learning_rate': 7.472826086956522e-06, 'epoch': 0.8505434782608695}\n", | |
"{'loss': 2.9177480697631837, 'learning_rate': 7.3369565217391315e-06, 'epoch': 0.8532608695652174}\n", | |
"{'loss': 2.9124732971191407, 'learning_rate': 7.201086956521739e-06, 'epoch': 0.8559782608695652}\n", | |
"{'loss': 2.8802576065063477, 'learning_rate': 7.065217391304347e-06, 'epoch': 0.8586956521739131}\n", | |
"{'loss': 2.820792579650879, 'learning_rate': 6.929347826086957e-06, 'epoch': 0.8614130434782609}\n", | |
"{'loss': 2.8953441619873046, 'learning_rate': 6.7934782608695655e-06, 'epoch': 0.8641304347826086}\n", | |
"{'loss': 2.8939170837402344, 'learning_rate': 6.657608695652175e-06, 'epoch': 0.8668478260869565}\n", | |
"{'loss': 2.8508325576782227, 'learning_rate': 6.521739130434783e-06, 'epoch': 0.8695652173913043}\n", | |
"{'loss': 2.891870307922363, 'learning_rate': 6.3858695652173924e-06, 'epoch': 0.8722826086956522}\n", | |
"{'loss': 2.8636974334716796, 'learning_rate': 6.25e-06, 'epoch': 0.875}\n", | |
"{'loss': 2.89998779296875, 'learning_rate': 6.114130434782609e-06, 'epoch': 0.8777173913043478}\n", | |
"{'loss': 2.8457530975341796, 'learning_rate': 5.978260869565218e-06, 'epoch': 0.8804347826086957}\n", | |
"{'loss': 2.912706756591797, 'learning_rate': 5.842391304347826e-06, 'epoch': 0.8831521739130435}\n", | |
" 88% 6500/7360 [17:30<02:09, 6.66it/s][INFO|trainer.py:1226] 2020-12-31 11:47:33,211 >> Saving model checkpoint to output/nosleep/checkpoint-6500\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:47:33,213 >> Configuration saved in output/nosleep/checkpoint-6500/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:47:34,988 >> Model weights saved in output/nosleep/checkpoint-6500/pytorch_model.bin\n", | |
"{'loss': 2.898715782165527, 'learning_rate': 5.706521739130435e-06, 'epoch': 0.8858695652173914}\n", | |
"{'loss': 2.9165903091430665, 'learning_rate': 5.570652173913044e-06, 'epoch': 0.8885869565217391}\n", | |
"{'loss': 2.7092910766601563, 'learning_rate': 5.4347826086956525e-06, 'epoch': 0.8913043478260869}\n", | |
"{'loss': 2.830535888671875, 'learning_rate': 5.298913043478261e-06, 'epoch': 0.8940217391304348}\n", | |
"{'loss': 2.9604679107666017, 'learning_rate': 5.16304347826087e-06, 'epoch': 0.8967391304347826}\n", | |
"{'loss': 2.933907699584961, 'learning_rate': 5.027173913043479e-06, 'epoch': 0.8994565217391305}\n", | |
"{'loss': 2.8418628692626955, 'learning_rate': 4.891304347826087e-06, 'epoch': 0.9021739130434783}\n", | |
"{'loss': 2.844557189941406, 'learning_rate': 4.755434782608696e-06, 'epoch': 0.904891304347826}\n", | |
"{'loss': 2.9896066665649412, 'learning_rate': 4.619565217391304e-06, 'epoch': 0.907608695652174}\n", | |
"{'loss': 2.973470687866211, 'learning_rate': 4.483695652173913e-06, 'epoch': 0.9103260869565217}\n", | |
"{'loss': 2.9425249099731445, 'learning_rate': 4.347826086956522e-06, 'epoch': 0.9130434782608695}\n", | |
"{'loss': 2.849333381652832, 'learning_rate': 4.211956521739131e-06, 'epoch': 0.9157608695652174}\n", | |
"{'loss': 2.9857072830200195, 'learning_rate': 4.07608695652174e-06, 'epoch': 0.9184782608695652}\n", | |
"{'loss': 2.958251953125, 'learning_rate': 3.940217391304348e-06, 'epoch': 0.9211956521739131}\n", | |
"{'loss': 2.8145523071289062, 'learning_rate': 3.804347826086957e-06, 'epoch': 0.9239130434782609}\n", | |
"{'loss': 2.7729000091552733, 'learning_rate': 3.6684782608695657e-06, 'epoch': 0.9266304347826086}\n", | |
"{'loss': 2.9284555435180666, 'learning_rate': 3.5326086956521736e-06, 'epoch': 0.9293478260869565}\n", | |
"{'loss': 2.8683935165405274, 'learning_rate': 3.3967391304347827e-06, 'epoch': 0.9320652173913043}\n", | |
"{'loss': 2.913663864135742, 'learning_rate': 3.2608695652173914e-06, 'epoch': 0.9347826086956522}\n", | |
"{'loss': 2.940004348754883, 'learning_rate': 3.125e-06, 'epoch': 0.9375}\n", | |
"{'loss': 2.8404977798461912, 'learning_rate': 2.989130434782609e-06, 'epoch': 0.9402173913043478}\n", | |
"{'loss': 2.9072927474975585, 'learning_rate': 2.8532608695652176e-06, 'epoch': 0.9429347826086957}\n", | |
"{'loss': 2.956881523132324, 'learning_rate': 2.7173913043478263e-06, 'epoch': 0.9456521739130435}\n", | |
"{'loss': 2.9157276153564453, 'learning_rate': 2.581521739130435e-06, 'epoch': 0.9483695652173914}\n", | |
"{'loss': 2.7884153366088866, 'learning_rate': 2.4456521739130437e-06, 'epoch': 0.9510869565217391}\n", | |
" 95% 7000/7360 [18:51<00:54, 6.62it/s][INFO|trainer.py:1226] 2020-12-31 11:48:53,869 >> Saving model checkpoint to output/nosleep/checkpoint-7000\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:48:53,871 >> Configuration saved in output/nosleep/checkpoint-7000/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:48:55,423 >> Model weights saved in output/nosleep/checkpoint-7000/pytorch_model.bin\n", | |
"{'loss': 2.8185762405395507, 'learning_rate': 2.309782608695652e-06, 'epoch': 0.9538043478260869}\n", | |
"{'loss': 2.95583553314209, 'learning_rate': 2.173913043478261e-06, 'epoch': 0.9565217391304348}\n", | |
"{'loss': 2.8609878540039064, 'learning_rate': 2.03804347826087e-06, 'epoch': 0.9592391304347826}\n", | |
"{'loss': 2.9434335708618162, 'learning_rate': 1.9021739130434785e-06, 'epoch': 0.9619565217391305}\n", | |
"{'loss': 2.781317138671875, 'learning_rate': 1.7663043478260868e-06, 'epoch': 0.9646739130434783}\n", | |
"{'loss': 2.8071691513061525, 'learning_rate': 1.6304347826086957e-06, 'epoch': 0.967391304347826}\n", | |
"{'loss': 2.9390436172485352, 'learning_rate': 1.4945652173913044e-06, 'epoch': 0.970108695652174}\n", | |
"{'loss': 2.787994384765625, 'learning_rate': 1.3586956521739131e-06, 'epoch': 0.9728260869565217}\n", | |
"{'loss': 2.919297790527344, 'learning_rate': 1.2228260869565218e-06, 'epoch': 0.9755434782608695}\n", | |
"{'loss': 2.9832035064697267, 'learning_rate': 1.0869565217391306e-06, 'epoch': 0.9782608695652174}\n", | |
"{'loss': 2.8524606704711912, 'learning_rate': 9.510869565217393e-07, 'epoch': 0.9809782608695652}\n", | |
"{'loss': 2.9475589752197267, 'learning_rate': 8.152173913043479e-07, 'epoch': 0.9836956521739131}\n", | |
"{'loss': 2.798427772521973, 'learning_rate': 6.793478260869566e-07, 'epoch': 0.9864130434782609}\n", | |
"{'loss': 2.921639823913574, 'learning_rate': 5.434782608695653e-07, 'epoch': 0.9891304347826086}\n", | |
"{'loss': 2.993421936035156, 'learning_rate': 4.0760869565217393e-07, 'epoch': 0.9918478260869565}\n", | |
"{'loss': 2.7664459228515623, 'learning_rate': 2.7173913043478264e-07, 'epoch': 0.9945652173913043}\n", | |
"{'loss': 2.8903715133666994, 'learning_rate': 1.3586956521739132e-07, 'epoch': 0.9972826086956522}\n", | |
"{'loss': 2.9647308349609376, 'learning_rate': 0.0, 'epoch': 1.0}\n", | |
"100% 7360/7360 [19:51<00:00, 6.65it/s][INFO|trainer.py:862] 2020-12-31 11:49:53,665 >> \n", | |
"\n", | |
"Training completed. Do not forget to share your model on huggingface.co/models =)\n", | |
"\n", | |
"\n", | |
"{'epoch': 1.0}\n", | |
"100% 7360/7360 [19:51<00:00, 6.18it/s]\n", | |
"[INFO|trainer.py:1226] 2020-12-31 11:49:53,713 >> Saving model checkpoint to output/nosleep\n", | |
"[INFO|configuration_utils.py:289] 2020-12-31 11:49:53,715 >> Configuration saved in output/nosleep/config.json\n", | |
"[INFO|modeling_utils.py:814] 2020-12-31 11:49:55,433 >> Model weights saved in output/nosleep/pytorch_model.bin\n", | |
"12/31/2020 11:49:55 - INFO - __main__ - ***** Train results *****\n", | |
"Traceback (most recent call last):\n", | |
" File \"run_clm.py\", line 385, in <module>\n", | |
" main()\n", | |
" File \"run_clm.py\", line 351, in main\n", | |
" for key, value in sorted(train_result.metrics.items()):\n", | |
"AttributeError: 'TrainOutput' object has no attribute 'metrics'\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "r2CLOnxXsP8U" | |
}, | |
"source": [ | |
"### Giving some input" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rsb1pSFYgXm7" | |
}, | |
"source": [ | |
"SENTENCES = [\"It was midnight when I\"]" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ZBfQVBDag1ut", | |
"outputId": "6a2069b2-d881-485f-e39e-114b036a696e" | |
}, | |
"source": [ | |
"import random\r\n", | |
"seed = random.randint(0, 2**32-1)\r\n", | |
"seed" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"2226218420" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 52 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-IndXjN1sVxL" | |
}, | |
"source": [ | |
"### Generating samples" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ufqVA9IXg27_", | |
"outputId": "8c0f14e4-f775-4e55-9e9f-4d94299e3bfa" | |
}, | |
"source": [ | |
"examples = []\r\n", | |
"num_return_sequences = 5\r\n", | |
"\r\n", | |
"for start in SENTENCES:\r\n", | |
" val = !python run_generation.py \\\r\n", | |
" --model_type gpt2 \\\r\n", | |
" --model_name_or_path output/$sub \\\r\n", | |
" --length 160 \\\r\n", | |
" --num_return_sequences $num_return_sequences \\\r\n", | |
" --temperature 0.5 \\\r\n", | |
" --p 0.95 \\\r\n", | |
" --seed $seed \\\r\n", | |
" --prompt {'\"<|endoftext|>' + start + '\"'}\r\n", | |
" generated = [val[-1-2*k] for k in range(num_return_sequences)[::-1]]\r\n", | |
" print(f'\\nStart of sentence: {start}')\r\n", | |
" for i, g in enumerate(generated):\r\n", | |
" g = g.replace('<|endoftext|>', '')\r\n", | |
" print(f'* Generated #{i+1}: {g}')" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"Start of sentence: It was midnight when I\n", | |
"* Generated #1: It was midnight when I arrived home from work. My wife and I had just gotten a new car. We had been spending most of our time at the local park and the time we spent in the park was spent in the park. I’d spent a lot of time there, getting to know the people, the parks, the people who lived in the park. I had been to the park before, but I had never seen anything like it. I was about to leave when I heard a knock on the door. “Come in!” I called out. I was about to jump out when I heard a knock on the door. “Hello?” I asked. “Oh, hello?” “You’re not in\n", | |
"* Generated #2: It was midnight when I woke up. I was in a hospital bed, but the doctors had not seen me for several days. I had been in a coma for about two weeks. I had been in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I was in a coma for about two weeks. I\n", | |
"* Generated #3: It was midnight when I woke up. I had to go to bed, but I didn’t want to go to sleep. I looked up at the ceiling, hoping I’d wake up. I was still awake, but I felt a little weak. I wanted to go to sleep, but I didn’t know what to do. I tried to scream, but I couldn’t. I tried to wake up, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I couldn’t. I tried to go to sleep, but I\n", | |
"* Generated #4: It was midnight when I got home from work. I was going to go check on my wife, but she was gone. I was going to go check on my daughter. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter, but she was gone. I went to check on my daughter,\n", | |
"* Generated #5: It was midnight when I woke up. My bedroom was dark, but I was awake. I could hear the faint thumping of a doorbell. I turned to look, but it was gone. I looked again, and there was nothing. The door was open, but I couldn’t see anything. I walked to the door, but it was locked. I got up, and walked to the bed. It was empty, but I could see a figure sitting on the bed. I looked up, and saw that it was the same one I’d seen in the house. I opened the door, and it was gone. I looked around again, and I saw that it was gone too. I walked to the door again, and saw that it was locked. I walked to the door again,\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment