Last active
June 28, 2023 14:56
-
-
Save Tostino/8b2c39e2c8b7120b2336d21e0c4458af to your computer and use it in GitHub Desktop.
training state
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"log_history": [ | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.6364, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.7064, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.6315, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.4869, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.574, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.5204, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.4908, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.5431, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.3717, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.3315, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.5871, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.6565, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.3501, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.4411, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 5e-05, | |
"loss": 1.3569, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3382, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4924, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.36, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4402, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3073, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.5564, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3551, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4532, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4447, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3871, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4453, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3914, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4736, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3323, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.351, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.506, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.5272, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4713, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3128, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4543, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3619, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.2431, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4737, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3682, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4169, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4997, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.5166, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4882, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.4921, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3296, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 5e-05, | |
"loss": 1.3964, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3418, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4531, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4157, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.389, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.5122, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4294, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3851, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4069, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4224, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.1366, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2522, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4021, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2508, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3009, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.413, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4812, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4051, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2753, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.5075, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2422, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2314, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.2432, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4572, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.4642, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.5075, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.5003, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3433, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3249, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3902, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.3914, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 5e-05, | |
"loss": 1.246, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3373, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3622, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.4186, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.03, | |
"eval_loss": 1.3826260566711426, | |
"eval_runtime": 172.7593, | |
"eval_samples_per_second": 0.37, | |
"eval_steps_per_second": 0.37, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3109, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.538, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.4145, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3053, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2739, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3654, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2727, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.43, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3069, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2252, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.473, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3879, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2748, | |
"step": 465 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3569, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2458, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2665, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3477, | |
"step": 485 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2846, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2871, | |
"step": 495 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.304, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.4381, | |
"step": 505 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.4845, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.4714, | |
"step": 515 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3786, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3275, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2829, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.3126, | |
"step": 535 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 5e-05, | |
"loss": 1.2801, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.0625, | |
"step": 545 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2973, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3612, | |
"step": 555 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.5801, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3131, | |
"step": 565 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3765, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.4223, | |
"step": 575 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3258, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.1761, | |
"step": 585 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3303, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3449, | |
"step": 595 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2357, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3934, | |
"step": 605 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2937, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.433, | |
"step": 615 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2455, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3202, | |
"step": 625 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3447, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2982, | |
"step": 635 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3978, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2515, | |
"step": 645 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3887, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.351, | |
"step": 655 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.4711, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3074, | |
"step": 665 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.4161, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.374, | |
"step": 675 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3812, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2492, | |
"step": 685 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.2655, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 5e-05, | |
"loss": 1.3438, | |
"step": 695 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2679, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3636, | |
"step": 705 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2089, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3814, | |
"step": 715 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3618, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2611, | |
"step": 725 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2884, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3621, | |
"step": 735 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.4313, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.4075, | |
"step": 745 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2317, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3491, | |
"step": 755 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3191, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3743, | |
"step": 765 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3254, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3155, | |
"step": 775 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.1721, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2114, | |
"step": 785 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2806, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3613, | |
"step": 795 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3065, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.05, | |
"eval_loss": 1.348414421081543, | |
"eval_runtime": 170.8505, | |
"eval_samples_per_second": 0.375, | |
"eval_steps_per_second": 0.375, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3469, | |
"step": 805 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3572, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.251, | |
"step": 815 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.4122, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2204, | |
"step": 825 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2749, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3343, | |
"step": 835 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2602, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.2729, | |
"step": 845 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 5e-05, | |
"loss": 1.3178, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4804, | |
"step": 855 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.5087, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3976, | |
"step": 865 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.221, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3278, | |
"step": 875 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3183, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.239, | |
"step": 885 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3092, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2971, | |
"step": 895 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3104, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3702, | |
"step": 905 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.5279, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2468, | |
"step": 915 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3806, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2312, | |
"step": 925 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4581, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.1727, | |
"step": 935 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3276, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2615, | |
"step": 945 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2483, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.5079, | |
"step": 955 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.545, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4007, | |
"step": 965 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4795, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4063, | |
"step": 975 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3431, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.3488, | |
"step": 985 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.1584, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4187, | |
"step": 995 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.2184, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 1.4363, | |
"step": 1005 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3427, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3033, | |
"step": 1015 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2708, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3664, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.292, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.4647, | |
"step": 1035 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3015, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3944, | |
"step": 1045 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2811, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3424, | |
"step": 1055 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3346, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.4237, | |
"step": 1065 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.1834, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3486, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 0.9791, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3935, | |
"step": 1085 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2381, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3597, | |
"step": 1095 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.1904, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.4621, | |
"step": 1105 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3123, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.3973, | |
"step": 1115 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.5139, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.338, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2288, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2593, | |
"step": 1135 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2298, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.1803, | |
"step": 1145 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.1293, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.4373, | |
"step": 1155 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2912, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 5e-05, | |
"loss": 1.2718, | |
"step": 1165 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3882, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1879, | |
"step": 1175 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1322, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1278, | |
"step": 1185 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2548, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2121, | |
"step": 1195 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3013, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.08, | |
"eval_loss": 1.3256827592849731, | |
"eval_runtime": 172.0592, | |
"eval_samples_per_second": 0.372, | |
"eval_steps_per_second": 0.372, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3047, | |
"step": 1205 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.0809, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3652, | |
"step": 1215 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.4293, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2539, | |
"step": 1225 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3585, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2909, | |
"step": 1235 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1438, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3345, | |
"step": 1245 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2577, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3531, | |
"step": 1255 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2548, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2895, | |
"step": 1265 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2503, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.4737, | |
"step": 1275 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1839, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1898, | |
"step": 1285 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.137, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.1937, | |
"step": 1295 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2237, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3925, | |
"step": 1305 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.4801, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.2994, | |
"step": 1315 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 5e-05, | |
"loss": 1.3229, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2441, | |
"step": 1325 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1524, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1314, | |
"step": 1335 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3781, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3092, | |
"step": 1345 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2591, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4065, | |
"step": 1355 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4643, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2354, | |
"step": 1365 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.357, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2665, | |
"step": 1375 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2324, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2895, | |
"step": 1385 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1987, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3698, | |
"step": 1395 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4526, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3736, | |
"step": 1405 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.5307, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2588, | |
"step": 1415 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2663, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3133, | |
"step": 1425 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.252, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1154, | |
"step": 1435 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1828, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1649, | |
"step": 1445 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4475, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4875, | |
"step": 1455 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.2571, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.4193, | |
"step": 1465 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.3211, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 5e-05, | |
"loss": 1.1705, | |
"step": 1475 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.1856, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.1191, | |
"step": 1485 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.408, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2634, | |
"step": 1495 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.1894, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3645, | |
"step": 1505 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.4673, | |
"step": 1510 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3525, | |
"step": 1515 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3761, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.225, | |
"step": 1525 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2653, | |
"step": 1530 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.1734, | |
"step": 1535 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2575, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2005, | |
"step": 1545 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2282, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3581, | |
"step": 1555 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3222, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.324, | |
"step": 1565 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.279, | |
"step": 1570 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3586, | |
"step": 1575 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.0479, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2811, | |
"step": 1585 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2678, | |
"step": 1590 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.052, | |
"step": 1595 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.5245, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_loss": 1.2858082056045532, | |
"eval_runtime": 170.4381, | |
"eval_samples_per_second": 0.376, | |
"eval_steps_per_second": 0.376, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2291, | |
"step": 1605 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.3934, | |
"step": 1610 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2201, | |
"step": 1615 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2251, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.2646, | |
"step": 1625 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 5e-05, | |
"loss": 1.1389, | |
"step": 1630 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1606, | |
"step": 1635 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2796, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1089, | |
"step": 1645 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3718, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3363, | |
"step": 1655 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2232, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1583, | |
"step": 1665 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.25, | |
"step": 1670 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1766, | |
"step": 1675 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1272, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3441, | |
"step": 1685 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1819, | |
"step": 1690 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.228, | |
"step": 1695 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1463, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3071, | |
"step": 1705 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.4164, | |
"step": 1710 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.0774, | |
"step": 1715 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3663, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.32, | |
"step": 1725 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3161, | |
"step": 1730 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1685, | |
"step": 1735 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3079, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2392, | |
"step": 1745 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2128, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.4134, | |
"step": 1755 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.4144, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2784, | |
"step": 1765 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.1817, | |
"step": 1770 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3256, | |
"step": 1775 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.3828, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 5e-05, | |
"loss": 1.2254, | |
"step": 1785 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2472, | |
"step": 1790 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1886, | |
"step": 1795 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.208, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.3732, | |
"step": 1805 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.4111, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.4227, | |
"step": 1815 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2345, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1606, | |
"step": 1825 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2977, | |
"step": 1830 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.402, | |
"step": 1835 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2448, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.198, | |
"step": 1845 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1034, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.4797, | |
"step": 1855 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2847, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.3228, | |
"step": 1865 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.4687, | |
"step": 1870 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1948, | |
"step": 1875 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2475, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.4383, | |
"step": 1885 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.3444, | |
"step": 1890 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 0.9692, | |
"step": 1895 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2756, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.3158, | |
"step": 1905 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.387, | |
"step": 1910 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.3715, | |
"step": 1915 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1022, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2917, | |
"step": 1925 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2714, | |
"step": 1930 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.2415, | |
"step": 1935 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 5e-05, | |
"loss": 1.1785, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 0.9969, | |
"step": 1945 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2847, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.4457, | |
"step": 1955 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3085, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3594, | |
"step": 1965 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3729, | |
"step": 1970 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3379, | |
"step": 1975 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2902, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.0801, | |
"step": 1985 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.286, | |
"step": 1990 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.4893, | |
"step": 1995 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2891, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.13, | |
"eval_loss": 1.2583482265472412, | |
"eval_runtime": 170.8925, | |
"eval_samples_per_second": 0.375, | |
"eval_steps_per_second": 0.375, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3182, | |
"step": 2005 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2123, | |
"step": 2010 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2642, | |
"step": 2015 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3112, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3857, | |
"step": 2025 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2618, | |
"step": 2030 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2141, | |
"step": 2035 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3295, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.1594, | |
"step": 2045 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2853, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.391, | |
"step": 2055 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2026, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.1922, | |
"step": 2065 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.1181, | |
"step": 2070 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.1812, | |
"step": 2075 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2093, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.1904, | |
"step": 2085 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.3221, | |
"step": 2090 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-05, | |
"loss": 1.2588, | |
"step": 2095 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2346, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.0474, | |
"step": 2105 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2424, | |
"step": 2110 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2811, | |
"step": 2115 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1532, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1884, | |
"step": 2125 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.0585, | |
"step": 2130 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3244, | |
"step": 2135 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2004, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3156, | |
"step": 2145 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2088, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.4366, | |
"step": 2155 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3323, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3196, | |
"step": 2165 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.0828, | |
"step": 2170 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3251, | |
"step": 2175 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2341, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2151, | |
"step": 2185 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1872, | |
"step": 2190 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1876, | |
"step": 2195 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1513, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3805, | |
"step": 2205 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.31, | |
"step": 2210 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2971, | |
"step": 2215 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.225, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.2219, | |
"step": 2225 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1819, | |
"step": 2230 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1366, | |
"step": 2235 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1346, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.3934, | |
"step": 2245 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 5e-05, | |
"loss": 1.1795, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.4171, | |
"step": 2255 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2584, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1782, | |
"step": 2265 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2334, | |
"step": 2270 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2925, | |
"step": 2275 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3983, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1496, | |
"step": 2285 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2801, | |
"step": 2290 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3062, | |
"step": 2295 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2909, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3861, | |
"step": 2305 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.0324, | |
"step": 2310 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3305, | |
"step": 2315 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1912, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1472, | |
"step": 2325 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3262, | |
"step": 2330 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1615, | |
"step": 2335 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1987, | |
"step": 2340 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3456, | |
"step": 2345 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.0409, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2478, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.4118, | |
"step": 2360 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2323, | |
"step": 2365 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.242, | |
"step": 2370 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.281, | |
"step": 2375 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2435, | |
"step": 2380 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.3362, | |
"step": 2385 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2595, | |
"step": 2390 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1774, | |
"step": 2395 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.1636, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 0.15, | |
"eval_loss": 1.2436387538909912, | |
"eval_runtime": 173.2641, | |
"eval_samples_per_second": 0.369, | |
"eval_steps_per_second": 0.369, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 5e-05, | |
"loss": 1.2307, | |
"step": 2405 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1783, | |
"step": 2410 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1864, | |
"step": 2415 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2467, | |
"step": 2420 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.22, | |
"step": 2425 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.182, | |
"step": 2430 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.0839, | |
"step": 2435 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1556, | |
"step": 2440 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2305, | |
"step": 2445 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1858, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2597, | |
"step": 2455 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.3852, | |
"step": 2460 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2533, | |
"step": 2465 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2725, | |
"step": 2470 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.3891, | |
"step": 2475 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2617, | |
"step": 2480 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.302, | |
"step": 2485 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.0664, | |
"step": 2490 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1244, | |
"step": 2495 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2648, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.3193, | |
"step": 2505 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.3719, | |
"step": 2510 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1923, | |
"step": 2515 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2426, | |
"step": 2520 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1807, | |
"step": 2525 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.2305, | |
"step": 2530 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1668, | |
"step": 2535 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1423, | |
"step": 2540 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.0657, | |
"step": 2545 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.1661, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.3593, | |
"step": 2555 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 5e-05, | |
"loss": 1.0947, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2497, | |
"step": 2565 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.3798, | |
"step": 2570 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2173, | |
"step": 2575 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2431, | |
"step": 2580 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 0.9598, | |
"step": 2585 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1198, | |
"step": 2590 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.3081, | |
"step": 2595 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2587, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.5154, | |
"step": 2605 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.3393, | |
"step": 2610 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1717, | |
"step": 2615 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1557, | |
"step": 2620 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.3518, | |
"step": 2625 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.0818, | |
"step": 2630 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1931, | |
"step": 2635 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1297, | |
"step": 2640 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.0826, | |
"step": 2645 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.3517, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.4603, | |
"step": 2655 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.0869, | |
"step": 2660 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1025, | |
"step": 2665 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.0751, | |
"step": 2670 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2981, | |
"step": 2675 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2074, | |
"step": 2680 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.053, | |
"step": 2685 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1748, | |
"step": 2690 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.0686, | |
"step": 2695 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.1607, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.2156, | |
"step": 2705 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.395, | |
"step": 2710 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 5e-05, | |
"loss": 1.144, | |
"step": 2715 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3105, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2532, | |
"step": 2725 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2659, | |
"step": 2730 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1705, | |
"step": 2735 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3798, | |
"step": 2740 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1516, | |
"step": 2745 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2663, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2434, | |
"step": 2755 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3825, | |
"step": 2760 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1639, | |
"step": 2765 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1089, | |
"step": 2770 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1156, | |
"step": 2775 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.311, | |
"step": 2780 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3591, | |
"step": 2785 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3305, | |
"step": 2790 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.0934, | |
"step": 2795 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2301, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_loss": 1.2233420610427856, | |
"eval_runtime": 170.1855, | |
"eval_samples_per_second": 0.376, | |
"eval_steps_per_second": 0.376, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3035, | |
"step": 2805 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2983, | |
"step": 2810 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1374, | |
"step": 2815 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1257, | |
"step": 2820 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1287, | |
"step": 2825 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.5898, | |
"step": 2830 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1721, | |
"step": 2835 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1643, | |
"step": 2840 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.1917, | |
"step": 2845 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3111, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.3071, | |
"step": 2855 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.6017, | |
"step": 2860 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.2082, | |
"step": 2865 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 5e-05, | |
"loss": 1.115, | |
"step": 2870 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2392, | |
"step": 2875 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.3031, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.0163, | |
"step": 2885 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1265, | |
"step": 2890 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1317, | |
"step": 2895 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.222, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.4443, | |
"step": 2905 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.3052, | |
"step": 2910 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1665, | |
"step": 2915 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2298, | |
"step": 2920 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.0718, | |
"step": 2925 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2547, | |
"step": 2930 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.175, | |
"step": 2935 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1845, | |
"step": 2940 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.3121, | |
"step": 2945 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1006, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1725, | |
"step": 2955 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2875, | |
"step": 2960 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.4183, | |
"step": 2965 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1377, | |
"step": 2970 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 0.9548, | |
"step": 2975 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1467, | |
"step": 2980 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2547, | |
"step": 2985 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.3458, | |
"step": 2990 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.376, | |
"step": 2995 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.3754, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2904, | |
"step": 3005 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1828, | |
"step": 3010 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.2004, | |
"step": 3015 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1844, | |
"step": 3020 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 5e-05, | |
"loss": 1.1612, | |
"step": 3025 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 0.9561, | |
"step": 3030 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2275, | |
"step": 3035 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 0.9808, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2718, | |
"step": 3045 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1771, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.3902, | |
"step": 3055 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1982, | |
"step": 3060 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.4785, | |
"step": 3065 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2184, | |
"step": 3070 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.3032, | |
"step": 3075 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1828, | |
"step": 3080 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2848, | |
"step": 3085 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.3645, | |
"step": 3090 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2512, | |
"step": 3095 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2417, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2396, | |
"step": 3105 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.335, | |
"step": 3110 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1056, | |
"step": 3115 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1872, | |
"step": 3120 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.0145, | |
"step": 3125 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.292, | |
"step": 3130 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.0618, | |
"step": 3135 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1003, | |
"step": 3140 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1697, | |
"step": 3145 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2506, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2888, | |
"step": 3155 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.3322, | |
"step": 3160 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.2993, | |
"step": 3165 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.1092, | |
"step": 3170 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.0593, | |
"step": 3175 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 5e-05, | |
"loss": 1.0538, | |
"step": 3180 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 0.9249, | |
"step": 3185 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1754, | |
"step": 3190 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1277, | |
"step": 3195 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.226, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 0.21, | |
"eval_loss": 1.179598093032837, | |
"eval_runtime": 170.3929, | |
"eval_samples_per_second": 0.376, | |
"eval_steps_per_second": 0.376, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1632, | |
"step": 3205 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.4241, | |
"step": 3210 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.0872, | |
"step": 3215 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.203, | |
"step": 3220 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.0094, | |
"step": 3225 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1493, | |
"step": 3230 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2938, | |
"step": 3235 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1267, | |
"step": 3240 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.0085, | |
"step": 3245 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.3159, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1966, | |
"step": 3255 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.0239, | |
"step": 3260 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2825, | |
"step": 3265 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.163, | |
"step": 3270 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2158, | |
"step": 3275 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.1857, | |
"step": 3280 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2131, | |
"step": 3285 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2768, | |
"step": 3290 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2117, | |
"step": 3295 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.3346, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2581, | |
"step": 3305 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.3054, | |
"step": 3310 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2141, | |
"step": 3315 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.38, | |
"step": 3320 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2808, | |
"step": 3325 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.3421, | |
"step": 3330 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.24, | |
"step": 3335 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 5e-05, | |
"loss": 1.2132, | |
"step": 3340 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.05, | |
"step": 3345 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.0499, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2259, | |
"step": 3355 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.4174, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.3429, | |
"step": 3365 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1113, | |
"step": 3370 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2014, | |
"step": 3375 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.0826, | |
"step": 3380 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.044, | |
"step": 3385 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.3054, | |
"step": 3390 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.0382, | |
"step": 3395 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2458, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2246, | |
"step": 3405 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1372, | |
"step": 3410 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.3249, | |
"step": 3415 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2838, | |
"step": 3420 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.0548, | |
"step": 3425 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2292, | |
"step": 3430 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 0.9921, | |
"step": 3435 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2817, | |
"step": 3440 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2422, | |
"step": 3445 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1707, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1564, | |
"step": 3455 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.3985, | |
"step": 3460 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1648, | |
"step": 3465 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 0.9971, | |
"step": 3470 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.0115, | |
"step": 3475 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1464, | |
"step": 3480 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.2196, | |
"step": 3485 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.1282, | |
"step": 3490 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 5e-05, | |
"loss": 1.4565, | |
"step": 3495 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.3163, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.33, | |
"step": 3505 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2985, | |
"step": 3510 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.3182, | |
"step": 3515 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.3114, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.0984, | |
"step": 3525 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.0306, | |
"step": 3530 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2963, | |
"step": 3535 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.3333, | |
"step": 3540 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2318, | |
"step": 3545 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2929, | |
"step": 3550 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.3573, | |
"step": 3555 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.358, | |
"step": 3560 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1039, | |
"step": 3565 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1091, | |
"step": 3570 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2715, | |
"step": 3575 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 0.8045, | |
"step": 3580 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.0576, | |
"step": 3585 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1391, | |
"step": 3590 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.0483, | |
"step": 3595 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1165, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 0.23, | |
"eval_loss": 1.155808448791504, | |
"eval_runtime": 172.5048, | |
"eval_samples_per_second": 0.371, | |
"eval_steps_per_second": 0.371, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1997, | |
"step": 3605 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1307, | |
"step": 3610 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2842, | |
"step": 3615 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2282, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1098, | |
"step": 3625 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.2388, | |
"step": 3630 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1147, | |
"step": 3635 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1659, | |
"step": 3640 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1453, | |
"step": 3645 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 5e-05, | |
"loss": 1.1628, | |
"step": 3650 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 0.988, | |
"step": 3655 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1183, | |
"step": 3660 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2033, | |
"step": 3665 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.0712, | |
"step": 3670 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2634, | |
"step": 3675 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.209, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.04, | |
"step": 3685 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1527, | |
"step": 3690 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.0483, | |
"step": 3695 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1756, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 0.9585, | |
"step": 3705 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2863, | |
"step": 3710 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.3797, | |
"step": 3715 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1856, | |
"step": 3720 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2615, | |
"step": 3725 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.209, | |
"step": 3730 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2585, | |
"step": 3735 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.237, | |
"step": 3740 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2346, | |
"step": 3745 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.023, | |
"step": 3750 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2877, | |
"step": 3755 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.185, | |
"step": 3760 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.0719, | |
"step": 3765 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1685, | |
"step": 3770 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.151, | |
"step": 3775 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1079, | |
"step": 3780 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 0.9535, | |
"step": 3785 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.0991, | |
"step": 3790 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2744, | |
"step": 3795 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.1761, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 5e-05, | |
"loss": 1.2275, | |
"step": 3805 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.221, | |
"step": 3810 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 0.9127, | |
"step": 3815 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1615, | |
"step": 3820 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1517, | |
"step": 3825 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0897, | |
"step": 3830 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1075, | |
"step": 3835 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 0.8878, | |
"step": 3840 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.2248, | |
"step": 3845 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.062, | |
"step": 3850 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.205, | |
"step": 3855 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.3044, | |
"step": 3860 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 0.9754, | |
"step": 3865 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0628, | |
"step": 3870 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 0.9701, | |
"step": 3875 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.2846, | |
"step": 3880 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1182, | |
"step": 3885 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 0.9332, | |
"step": 3890 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0809, | |
"step": 3895 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0681, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0162, | |
"step": 3905 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.118, | |
"step": 3910 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1402, | |
"step": 3915 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.4813, | |
"step": 3920 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.2975, | |
"step": 3925 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0903, | |
"step": 3930 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1279, | |
"step": 3935 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.1641, | |
"step": 3940 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.4793, | |
"step": 3945 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.299, | |
"step": 3950 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.2859, | |
"step": 3955 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 5e-05, | |
"loss": 1.0884, | |
"step": 3960 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.3883, | |
"step": 3965 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 0.8258, | |
"step": 3970 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.0458, | |
"step": 3975 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 0.818, | |
"step": 3980 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.3033, | |
"step": 3985 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.1281, | |
"step": 3990 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.1215, | |
"step": 3995 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 5e-05, | |
"loss": 1.3327, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.26, | |
"eval_loss": 1.118335485458374, | |
"eval_runtime": 172.4548, | |
"eval_samples_per_second": 0.371, | |
"eval_steps_per_second": 0.371, | |
"step": 4000 | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment