Created
June 10, 2021 05:59
-
-
Save Beomi/1aa650f75c8e9b3dd467038004244ed2 to your computer and use it in GitHub Desktop.
exBERT-transformers sample train results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(exbert-transformers) root@jupyter-beomi:~/exbert-transformers/examples/pytorch/language-modeling# ./exbert_pretrain.sh | |
2021-06-10 05:57:55.339746: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | |
2021-06-10 05:57:55.339796: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do nothave a GPU set up on your machine. | |
06/10/2021 05:57:58 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False | |
06/10/2021 05:57:58 - INFO - __main__ - Training/evaluation parameters TrainingArguments( | |
_n_gpu=1, | |
adafactor=False, | |
adam_beta1=0.9, | |
adam_beta2=0.999, | |
adam_epsilon=1e-08, | |
dataloader_drop_last=False, | |
dataloader_num_workers=0, | |
dataloader_pin_memory=True, | |
ddp_find_unused_parameters=None, | |
debug=[], | |
deepspeed=None, | |
disable_tqdm=False, | |
do_eval=False, | |
do_predict=False, | |
do_train=True, | |
eval_accumulation_steps=None, | |
eval_steps=500, | |
evaluation_strategy=IntervalStrategy.NO, | |
fp16=False, | |
fp16_backend=auto, | |
fp16_full_eval=False, | |
fp16_opt_level=O1, | |
gradient_accumulation_steps=1, | |
greater_is_better=None, | |
group_by_length=False, | |
ignore_data_skip=False, | |
label_names=None, | |
label_smoothing_factor=0.0, | |
learning_rate=5e-05, | |
length_column_name=length, | |
load_best_model_at_end=False, | |
local_rank=-1, | |
log_on_each_node=True, | |
logging_dir=runs/Jun10_05-57-58_jupyter-beomi, | |
logging_first_step=True, | |
logging_steps=500, | |
logging_strategy=IntervalStrategy.STEPS, | |
lr_scheduler_type=SchedulerType.LINEAR, | |
max_grad_norm=1.0, | |
max_steps=-1, | |
metric_for_best_model=None, | |
mp_parameters=, | |
no_cuda=False, | |
num_train_epochs=1.0, | |
output_dir=./exbert-mlm, | |
overwrite_output_dir=False, | |
past_index=-1, | |
per_device_eval_batch_size=8, | |
per_device_train_batch_size=8, | |
prediction_loss_only=False, | |
push_to_hub=False, | |
remove_unused_columns=True, | |
report_to=['tensorboard'], | |
resume_from_checkpoint=None, | |
run_name=./exbert-mlm, | |
save_steps=500, | |
save_strategy=IntervalStrategy.STEPS, | |
save_total_limit=None, | |
seed=42, | |
sharded_ddp=[], | |
skip_memory_metrics=True, | |
tpu_metrics_debug=False, | |
tpu_num_cores=None, | |
use_legacy_prediction_loop=False, | |
warmup_ratio=0.0, | |
warmup_steps=0, | |
weight_decay=0.0, | |
) | |
06/10/2021 05:57:59 - WARNING - datasets.builder - Using custom data configuration default-477b0faef5910d5a | |
Downloading and preparing dataset text/default (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /home/jovyan/.cache/huggingface/datasets/text/default-477b0faef5910d5a/0.0.0/e16f44aa1b321ece1f87b07977cc5d70be93d69b20486d6dacd62e12cf25c9a5... | |
Dataset text downloaded and prepared to /home/jovyan/.cache/huggingface/datasets/text/default-477b0faef5910d5a/0.0.0/e16f44aa1b321ece1f87b07977cc5d70be93d69b20486d6dacd62e12cf25c9a5. Subsequent calls will reuse this data. | |
06/10/2021 05:57:59 - WARNING - __main__ - You are instantiating a new config instance from scratch. | |
[INFO|tokenization_utils_base.py:1651] 2021-06-10 05:57:59,979 >> Didn't find file exbert/added_tokens.json. We won't loadit. | |
[INFO|tokenization_utils_base.py:1651] 2021-06-10 05:57:59,980 >> Didn't find file exbert/special_tokens_map.json. We won't load it. | |
[INFO|tokenization_utils_base.py:1651] 2021-06-10 05:57:59,980 >> Didn't find file exbert/tokenizer_config.json. We won't load it. | |
[INFO|tokenization_utils_base.py:1651] 2021-06-10 05:57:59,980 >> Didn't find file exbert/tokenizer.json. We won't load it. | |
[INFO|tokenization_utils_base.py:1715] 2021-06-10 05:57:59,981 >> loading file exbert/vocab.txt | |
[INFO|tokenization_utils_base.py:1715] 2021-06-10 05:57:59,981 >> loading file None | |
[INFO|tokenization_utils_base.py:1715] 2021-06-10 05:57:59,981 >> loading file None | |
[INFO|tokenization_utils_base.py:1715] 2021-06-10 05:57:59,981 >> loading file None | |
[INFO|tokenization_utils_base.py:1715] 2021-06-10 05:57:59,981 >> loading file None | |
06/10/2021 05:58:00 - INFO - __main__ - Training new model from scratch | |
[INFO|configuration_utils.py:517] 2021-06-10 05:58:03,805 >> loading configuration file https://huggingface.co/beomi/kcbert-base/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/10de039f2f91b0c6fbd30fad5bf8a7468a20701212ed12f9f5e610edb99c55d1.d8a72131e15fd1d856f1b39abf4eff31d458aeeca0a4192df898ca699ec7d779 | |
[INFO|configuration_utils.py:553] 2021-06-10 05:58:03,807 >> Model config BertConfig { | |
"architectures": [ | |
"BertForMaskedLM" | |
], | |
"attention_probs_dropout_prob": 0.1, | |
"directionality": "bidi", | |
"gradient_checkpointing": false, | |
"hidden_act": "gelu", | |
"hidden_dropout_prob": 0.1, | |
"hidden_size": 768, | |
"initializer_range": 0.02, | |
"intermediate_size": 3072, | |
"layer_norm_eps": 1e-12, | |
"max_position_embeddings": 300, | |
"model_type": "bert", | |
"num_attention_heads": 12, | |
"num_hidden_layers": 12, | |
"pad_token_id": 0, | |
"pooler_fc_size": 768, | |
"pooler_num_attention_heads": 12, | |
"pooler_num_fc_layers": 3, | |
"pooler_size_per_head": 128, | |
"pooler_type": "first_token_transform", | |
"position_embedding_type": "absolute", | |
"transformers_version": "4.7.0.dev0", | |
"type_vocab_size": 2, | |
"use_cache": true, | |
"vocab_size": 30000 | |
} | |
[INFO|modeling_utils.py:1155] 2021-06-10 05:58:04,609 >> loading weights file https://huggingface.co/beomi/kcbert-base/resolve/main/pytorch_model.bin from cache at /home/jovyan/.cache/huggingface/transformers/1c204bf1f008ee734eeb5ce678b148d14fa298802ce16d879c92a22a52527a0e.6cdf570ee57a7f6a5c727c436a4c26d8e9601ddaa1377ebcb16b7285d76125cd | |
[WARNING|modeling_utils.py:1330] 2021-06-10 05:58:07,188 >> Some weights of the model checkpoint at beomi/kcbert-base werenot used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight'] | |
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). | |
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). | |
[INFO|modeling_utils.py:1347] 2021-06-10 05:58:07,188 >> All the weights of BertModel were initialized from the model checkpoint at beomi/kcbert-base. | |
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertModel for predictions without further training. | |
100%|████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00, 1.40ba/s] | |
100%|████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00, 3.25ba/s] | |
[INFO|trainer.py:514] 2021-06-10 05:58:17,960 >> The following columns in the training set don't have a corresponding argument in `exBertForMaskedLM.forward` and have been ignored: special_tokens_mask. | |
[INFO|trainer.py:1147] 2021-06-10 05:58:17,976 >> ***** Running training ***** | |
[INFO|trainer.py:1148] 2021-06-10 05:58:17,976 >> Num examples = 452 | |
[INFO|trainer.py:1149] 2021-06-10 05:58:17,976 >> Num Epochs = 1 | |
[INFO|trainer.py:1150] 2021-06-10 05:58:17,976 >> Instantaneous batch size per device = 8 | |
[INFO|trainer.py:1151] 2021-06-10 05:58:17,976 >> Total train batch size (w. parallel, distributed & accumulation) = 8 | |
[INFO|trainer.py:1152] 2021-06-10 05:58:17,976 >> Gradient Accumulation steps = 1 | |
[INFO|trainer.py:1153] 2021-06-10 05:58:17,976 >> Total optimization steps = 57 | |
{'loss': 10.7157, 'learning_rate': 4.912280701754386e-05, 'epoch': 0.02} | |
100%|██████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:18<00:00, 3.41it/s][INFO|trainer.py:1343] 2021-06-10 05:58:36,148 >> | |
Training completed. Do not forget to share your model on huggingface.co/models =) | |
{'train_runtime': 18.1725, 'train_samples_per_second': 24.873, 'train_steps_per_second': 3.137, 'train_loss': 5.573559560273823, 'epoch': 1.0} | |
100%|██████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:18<00:00, 3.14it/s] | |
[INFO|trainer.py:1894] 2021-06-10 05:58:36,152 >> Saving model checkpoint to ./exbert-mlm | |
[INFO|configuration_utils.py:351] 2021-06-10 05:58:36,155 >> Configuration saved in ./exbert-mlm/config.json | |
[INFO|modeling_utils.py:889] 2021-06-10 05:58:37,568 >> Model weights saved in ./exbert-mlm/pytorch_model.bin | |
[INFO|tokenization_utils_base.py:1924] 2021-06-10 05:58:37,571 >> tokenizer config file saved in ./exbert-mlm/tokenizer_config.json | |
[INFO|tokenization_utils_base.py:1930] 2021-06-10 05:58:37,573 >> Special tokens file saved in ./exbert-mlm/special_tokens_map.json | |
[INFO|trainer_pt_utils.py:907] 2021-06-10 05:58:37,617 >> ***** train metrics ***** | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,617 >> epoch = 1.0 | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,617 >> train_loss = 5.5736 | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,617 >> train_runtime = 0:00:18.17 | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,618 >> train_samples = 452 | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,618 >> train_samples_per_second = 24.873 | |
[INFO|trainer_pt_utils.py:912] 2021-06-10 05:58:37,618 >> train_steps_per_second = 3.137 | |
(exbert-transformers) root@jupyter-beomi:~/exbert-transformers/examples/pytorch/language-modeling# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment