|
{ |
|
"best_global_step": 3000, |
|
"best_metric": 0.9244844913482666, |
|
"best_model_checkpoint": "/workspace/runs_coder_noed_10/checkpoint-3000", |
|
"epoch": 0.7334963325183375, |
|
"eval_steps": 1000, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0024449877750611247, |
|
"grad_norm": 0.7603356838226318, |
|
"learning_rate": 9.988997555012225e-06, |
|
"loss": 2.0597, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004889975550122249, |
|
"grad_norm": 0.491472065448761, |
|
"learning_rate": 9.97677261613692e-06, |
|
"loss": 1.8106, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007334963325183374, |
|
"grad_norm": 0.421262264251709, |
|
"learning_rate": 9.964547677261615e-06, |
|
"loss": 1.6648, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009779951100244499, |
|
"grad_norm": 0.3425203263759613, |
|
"learning_rate": 9.95232273838631e-06, |
|
"loss": 1.552, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012224938875305624, |
|
"grad_norm": 0.3278200328350067, |
|
"learning_rate": 9.940097799511004e-06, |
|
"loss": 1.4701, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014669926650366748, |
|
"grad_norm": 0.3467581868171692, |
|
"learning_rate": 9.927872860635697e-06, |
|
"loss": 1.3899, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017114914425427872, |
|
"grad_norm": 0.3279109001159668, |
|
"learning_rate": 9.915647921760392e-06, |
|
"loss": 1.3596, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019559902200488997, |
|
"grad_norm": 0.3338731825351715, |
|
"learning_rate": 9.903422982885086e-06, |
|
"loss": 1.335, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022004889975550123, |
|
"grad_norm": 0.39886194467544556, |
|
"learning_rate": 9.891198044009781e-06, |
|
"loss": 1.3055, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02444987775061125, |
|
"grad_norm": 0.3964369297027588, |
|
"learning_rate": 9.878973105134476e-06, |
|
"loss": 1.2911, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02689486552567237, |
|
"grad_norm": 0.42521995306015015, |
|
"learning_rate": 9.866748166259169e-06, |
|
"loss": 1.2764, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.029339853300733496, |
|
"grad_norm": 0.40386906266212463, |
|
"learning_rate": 9.854523227383865e-06, |
|
"loss": 1.2573, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03178484107579462, |
|
"grad_norm": 0.42052966356277466, |
|
"learning_rate": 9.842298288508558e-06, |
|
"loss": 1.2147, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.034229828850855744, |
|
"grad_norm": 0.4453684091567993, |
|
"learning_rate": 9.830073349633253e-06, |
|
"loss": 1.2383, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03667481662591687, |
|
"grad_norm": 0.4648647606372833, |
|
"learning_rate": 9.817848410757947e-06, |
|
"loss": 1.2257, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.039119804400977995, |
|
"grad_norm": 0.4787471890449524, |
|
"learning_rate": 9.805623471882642e-06, |
|
"loss": 1.1803, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04156479217603912, |
|
"grad_norm": 0.5450437068939209, |
|
"learning_rate": 9.793398533007335e-06, |
|
"loss": 1.1454, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.044009779951100246, |
|
"grad_norm": 0.4808744192123413, |
|
"learning_rate": 9.78117359413203e-06, |
|
"loss": 1.1936, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04645476772616137, |
|
"grad_norm": 0.48509007692337036, |
|
"learning_rate": 9.768948655256724e-06, |
|
"loss": 1.1674, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0488997555012225, |
|
"grad_norm": 0.5013049840927124, |
|
"learning_rate": 9.756723716381419e-06, |
|
"loss": 1.1691, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05134474327628362, |
|
"grad_norm": 0.5472636818885803, |
|
"learning_rate": 9.744498777506112e-06, |
|
"loss": 1.1596, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05378973105134474, |
|
"grad_norm": 0.5386976599693298, |
|
"learning_rate": 9.732273838630808e-06, |
|
"loss": 1.1449, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05623471882640587, |
|
"grad_norm": 0.5038230419158936, |
|
"learning_rate": 9.720048899755501e-06, |
|
"loss": 1.1029, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05867970660146699, |
|
"grad_norm": 0.5687288045883179, |
|
"learning_rate": 9.707823960880196e-06, |
|
"loss": 1.1463, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.061124694376528114, |
|
"grad_norm": 0.53361576795578, |
|
"learning_rate": 9.69559902200489e-06, |
|
"loss": 1.1289, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06356968215158924, |
|
"grad_norm": 0.5407126545906067, |
|
"learning_rate": 9.683374083129585e-06, |
|
"loss": 1.0949, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06601466992665037, |
|
"grad_norm": 0.6220566630363464, |
|
"learning_rate": 9.67114914425428e-06, |
|
"loss": 1.1131, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06845965770171149, |
|
"grad_norm": 0.5715101361274719, |
|
"learning_rate": 9.658924205378973e-06, |
|
"loss": 1.108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07090464547677261, |
|
"grad_norm": 0.5447026491165161, |
|
"learning_rate": 9.646699266503668e-06, |
|
"loss": 1.0953, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07334963325183375, |
|
"grad_norm": 0.607760488986969, |
|
"learning_rate": 9.634474327628362e-06, |
|
"loss": 1.0689, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07579462102689487, |
|
"grad_norm": 0.5735874176025391, |
|
"learning_rate": 9.622249388753057e-06, |
|
"loss": 1.0704, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07823960880195599, |
|
"grad_norm": 0.6558325886726379, |
|
"learning_rate": 9.610024449877752e-06, |
|
"loss": 1.0834, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08068459657701711, |
|
"grad_norm": 0.644957959651947, |
|
"learning_rate": 9.597799511002447e-06, |
|
"loss": 1.1016, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08312958435207823, |
|
"grad_norm": 0.4896561801433563, |
|
"learning_rate": 9.58557457212714e-06, |
|
"loss": 1.0945, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08557457212713937, |
|
"grad_norm": 0.5884416699409485, |
|
"learning_rate": 9.573349633251834e-06, |
|
"loss": 1.1114, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08801955990220049, |
|
"grad_norm": 0.6569796204566956, |
|
"learning_rate": 9.561124694376529e-06, |
|
"loss": 1.1083, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09046454767726161, |
|
"grad_norm": 0.6272417306900024, |
|
"learning_rate": 9.548899755501224e-06, |
|
"loss": 1.0743, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09290953545232274, |
|
"grad_norm": 0.5897823572158813, |
|
"learning_rate": 9.536674816625916e-06, |
|
"loss": 1.0434, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09535452322738386, |
|
"grad_norm": 0.5946202278137207, |
|
"learning_rate": 9.524449877750613e-06, |
|
"loss": 1.0474, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.097799511002445, |
|
"grad_norm": 0.5851741433143616, |
|
"learning_rate": 9.512224938875306e-06, |
|
"loss": 1.0697, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10024449877750612, |
|
"grad_norm": 0.7547958493232727, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.0767, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10268948655256724, |
|
"grad_norm": 0.651896595954895, |
|
"learning_rate": 9.487775061124695e-06, |
|
"loss": 1.0853, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10513447432762836, |
|
"grad_norm": 0.6152411103248596, |
|
"learning_rate": 9.47555012224939e-06, |
|
"loss": 1.0582, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10757946210268948, |
|
"grad_norm": 0.707961916923523, |
|
"learning_rate": 9.463325183374083e-06, |
|
"loss": 1.0658, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1100244498777506, |
|
"grad_norm": 0.6049492359161377, |
|
"learning_rate": 9.45110024449878e-06, |
|
"loss": 1.0687, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11246943765281174, |
|
"grad_norm": 0.6512130498886108, |
|
"learning_rate": 9.438875305623472e-06, |
|
"loss": 1.0574, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11491442542787286, |
|
"grad_norm": 0.6922876834869385, |
|
"learning_rate": 9.426650366748167e-06, |
|
"loss": 1.0311, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11735941320293398, |
|
"grad_norm": 0.6023642420768738, |
|
"learning_rate": 9.414425427872862e-06, |
|
"loss": 1.0098, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1198044009779951, |
|
"grad_norm": 0.6482692360877991, |
|
"learning_rate": 9.402200488997556e-06, |
|
"loss": 1.0537, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12224938875305623, |
|
"grad_norm": 0.6799217462539673, |
|
"learning_rate": 9.38997555012225e-06, |
|
"loss": 1.0474, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12469437652811736, |
|
"grad_norm": 0.7018452286720276, |
|
"learning_rate": 9.377750611246944e-06, |
|
"loss": 1.0273, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1271393643031785, |
|
"grad_norm": 0.7044438123703003, |
|
"learning_rate": 9.365525672371639e-06, |
|
"loss": 1.0305, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1295843520782396, |
|
"grad_norm": 0.7802556157112122, |
|
"learning_rate": 9.353300733496333e-06, |
|
"loss": 1.0758, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13202933985330073, |
|
"grad_norm": 0.6845948100090027, |
|
"learning_rate": 9.341075794621028e-06, |
|
"loss": 1.0158, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13447432762836187, |
|
"grad_norm": 0.7554173469543457, |
|
"learning_rate": 9.328850855745723e-06, |
|
"loss": 1.0307, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13691931540342298, |
|
"grad_norm": 0.7574878334999084, |
|
"learning_rate": 9.316625916870417e-06, |
|
"loss": 0.9949, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1393643031784841, |
|
"grad_norm": 0.6939298510551453, |
|
"learning_rate": 9.30440097799511e-06, |
|
"loss": 1.041, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14180929095354522, |
|
"grad_norm": 0.6318020224571228, |
|
"learning_rate": 9.292176039119805e-06, |
|
"loss": 1.032, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14425427872860636, |
|
"grad_norm": 0.7026094794273376, |
|
"learning_rate": 9.2799511002445e-06, |
|
"loss": 1.0642, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1466992665036675, |
|
"grad_norm": 0.6520543694496155, |
|
"learning_rate": 9.267726161369194e-06, |
|
"loss": 1.0535, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1491442542787286, |
|
"grad_norm": 0.6793085336685181, |
|
"learning_rate": 9.255501222493887e-06, |
|
"loss": 1.0447, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15158924205378974, |
|
"grad_norm": 0.6935579776763916, |
|
"learning_rate": 9.243276283618584e-06, |
|
"loss": 1.0151, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15403422982885084, |
|
"grad_norm": 0.767677903175354, |
|
"learning_rate": 9.231051344743277e-06, |
|
"loss": 0.9973, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15647921760391198, |
|
"grad_norm": 0.7972632646560669, |
|
"learning_rate": 9.218826405867971e-06, |
|
"loss": 0.9839, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15892420537897312, |
|
"grad_norm": 0.716218113899231, |
|
"learning_rate": 9.206601466992666e-06, |
|
"loss": 0.9674, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16136919315403422, |
|
"grad_norm": 0.6637817621231079, |
|
"learning_rate": 9.19437652811736e-06, |
|
"loss": 1.0171, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16381418092909536, |
|
"grad_norm": 0.6555060148239136, |
|
"learning_rate": 9.182151589242054e-06, |
|
"loss": 1.0037, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16625916870415647, |
|
"grad_norm": 0.7460082769393921, |
|
"learning_rate": 9.16992665036675e-06, |
|
"loss": 1.0329, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1687041564792176, |
|
"grad_norm": 0.6652795672416687, |
|
"learning_rate": 9.157701711491443e-06, |
|
"loss": 1.041, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17114914425427874, |
|
"grad_norm": 0.6055278182029724, |
|
"learning_rate": 9.145476772616138e-06, |
|
"loss": 1.0152, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17359413202933985, |
|
"grad_norm": 0.7681952118873596, |
|
"learning_rate": 9.133251833740832e-06, |
|
"loss": 1.0292, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17603911980440098, |
|
"grad_norm": 0.8185127973556519, |
|
"learning_rate": 9.121026894865527e-06, |
|
"loss": 1.002, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1784841075794621, |
|
"grad_norm": 0.6619365811347961, |
|
"learning_rate": 9.10880195599022e-06, |
|
"loss": 1.0032, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18092909535452323, |
|
"grad_norm": 0.6229087710380554, |
|
"learning_rate": 9.096577017114915e-06, |
|
"loss": 1.0107, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18337408312958436, |
|
"grad_norm": 0.8737857937812805, |
|
"learning_rate": 9.08435207823961e-06, |
|
"loss": 1.0276, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18581907090464547, |
|
"grad_norm": 0.7398986220359802, |
|
"learning_rate": 9.072127139364304e-06, |
|
"loss": 1.0185, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1882640586797066, |
|
"grad_norm": 0.662419855594635, |
|
"learning_rate": 9.059902200488999e-06, |
|
"loss": 1.0028, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19070904645476772, |
|
"grad_norm": 0.7412256002426147, |
|
"learning_rate": 9.047677261613693e-06, |
|
"loss": 1.0102, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19315403422982885, |
|
"grad_norm": 0.7194265127182007, |
|
"learning_rate": 9.035452322738388e-06, |
|
"loss": 1.0024, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19559902200489, |
|
"grad_norm": 0.6712033152580261, |
|
"learning_rate": 9.023227383863081e-06, |
|
"loss": 0.981, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1980440097799511, |
|
"grad_norm": 0.7297273278236389, |
|
"learning_rate": 9.011002444987776e-06, |
|
"loss": 1.0087, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.20048899755501223, |
|
"grad_norm": 0.7646290063858032, |
|
"learning_rate": 8.99877750611247e-06, |
|
"loss": 1.008, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.20293398533007334, |
|
"grad_norm": 0.6914790272712708, |
|
"learning_rate": 8.986552567237165e-06, |
|
"loss": 0.9967, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20537897310513448, |
|
"grad_norm": 0.7169461250305176, |
|
"learning_rate": 8.974327628361858e-06, |
|
"loss": 0.9945, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2078239608801956, |
|
"grad_norm": 0.686245322227478, |
|
"learning_rate": 8.962102689486554e-06, |
|
"loss": 1.003, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21026894865525672, |
|
"grad_norm": 0.7075187563896179, |
|
"learning_rate": 8.949877750611247e-06, |
|
"loss": 0.9875, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21271393643031786, |
|
"grad_norm": 0.8134737610816956, |
|
"learning_rate": 8.937652811735942e-06, |
|
"loss": 1.0068, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21515892420537897, |
|
"grad_norm": 0.6891161799430847, |
|
"learning_rate": 8.925427872860637e-06, |
|
"loss": 1.0258, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2176039119804401, |
|
"grad_norm": 0.8024953007698059, |
|
"learning_rate": 8.913202933985331e-06, |
|
"loss": 1.0199, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2200488997555012, |
|
"grad_norm": 0.6608093976974487, |
|
"learning_rate": 8.900977995110024e-06, |
|
"loss": 1.0042, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22249388753056235, |
|
"grad_norm": 0.7665097117424011, |
|
"learning_rate": 8.888753056234719e-06, |
|
"loss": 1.0004, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22493887530562348, |
|
"grad_norm": 0.7506141662597656, |
|
"learning_rate": 8.876528117359414e-06, |
|
"loss": 0.9913, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2273838630806846, |
|
"grad_norm": 0.8530935049057007, |
|
"learning_rate": 8.864303178484108e-06, |
|
"loss": 1.0061, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22982885085574573, |
|
"grad_norm": 0.7526981830596924, |
|
"learning_rate": 8.852078239608803e-06, |
|
"loss": 1.0343, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.23227383863080683, |
|
"grad_norm": 0.6752293109893799, |
|
"learning_rate": 8.839853300733498e-06, |
|
"loss": 1.0351, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23471882640586797, |
|
"grad_norm": 0.7808672189712524, |
|
"learning_rate": 8.82762836185819e-06, |
|
"loss": 1.0216, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2371638141809291, |
|
"grad_norm": 0.7220605611801147, |
|
"learning_rate": 8.815403422982885e-06, |
|
"loss": 1.0066, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2396088019559902, |
|
"grad_norm": 0.777722954750061, |
|
"learning_rate": 8.80317848410758e-06, |
|
"loss": 0.9936, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.24205378973105135, |
|
"grad_norm": 0.7381393313407898, |
|
"learning_rate": 8.790953545232275e-06, |
|
"loss": 0.9982, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.24449877750611246, |
|
"grad_norm": 0.6590691804885864, |
|
"learning_rate": 8.77872860635697e-06, |
|
"loss": 0.9788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24449877750611246, |
|
"eval_loss": 0.9873583912849426, |
|
"eval_runtime": 795.9609, |
|
"eval_samples_per_second": 18.27, |
|
"eval_steps_per_second": 0.572, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2469437652811736, |
|
"grad_norm": 0.851658284664154, |
|
"learning_rate": 8.766503667481662e-06, |
|
"loss": 0.9715, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24938875305623473, |
|
"grad_norm": 0.6827694177627563, |
|
"learning_rate": 8.754278728606359e-06, |
|
"loss": 0.9521, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.25183374083129584, |
|
"grad_norm": 0.7433952689170837, |
|
"learning_rate": 8.742053789731052e-06, |
|
"loss": 1.0064, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.254278728606357, |
|
"grad_norm": 0.7873063087463379, |
|
"learning_rate": 8.729828850855746e-06, |
|
"loss": 0.997, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2567237163814181, |
|
"grad_norm": 0.7808490991592407, |
|
"learning_rate": 8.717603911980441e-06, |
|
"loss": 1.021, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2591687041564792, |
|
"grad_norm": 0.7002695798873901, |
|
"learning_rate": 8.705378973105136e-06, |
|
"loss": 0.9787, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2616136919315403, |
|
"grad_norm": 0.8023979663848877, |
|
"learning_rate": 8.693154034229829e-06, |
|
"loss": 0.9913, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.26405867970660146, |
|
"grad_norm": 0.6559330224990845, |
|
"learning_rate": 8.680929095354525e-06, |
|
"loss": 0.9755, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2665036674816626, |
|
"grad_norm": 0.703667402267456, |
|
"learning_rate": 8.668704156479218e-06, |
|
"loss": 1.0047, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.26894865525672373, |
|
"grad_norm": 0.7463963627815247, |
|
"learning_rate": 8.656479217603913e-06, |
|
"loss": 0.9607, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2713936430317848, |
|
"grad_norm": 0.8031372427940369, |
|
"learning_rate": 8.644254278728606e-06, |
|
"loss": 0.9835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.27383863080684595, |
|
"grad_norm": 0.7800993919372559, |
|
"learning_rate": 8.632029339853302e-06, |
|
"loss": 0.9651, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2762836185819071, |
|
"grad_norm": 0.8147879242897034, |
|
"learning_rate": 8.619804400977995e-06, |
|
"loss": 1.029, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2787286063569682, |
|
"grad_norm": 0.6160231828689575, |
|
"learning_rate": 8.60757946210269e-06, |
|
"loss": 0.9771, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.28117359413202936, |
|
"grad_norm": 0.8735865354537964, |
|
"learning_rate": 8.595354523227385e-06, |
|
"loss": 0.9719, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.28361858190709044, |
|
"grad_norm": 0.6970623135566711, |
|
"learning_rate": 8.58312958435208e-06, |
|
"loss": 0.9948, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2860635696821516, |
|
"grad_norm": 0.7240091562271118, |
|
"learning_rate": 8.570904645476774e-06, |
|
"loss": 0.9798, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2885085574572127, |
|
"grad_norm": 0.8112177848815918, |
|
"learning_rate": 8.558679706601469e-06, |
|
"loss": 0.9857, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.29095354523227385, |
|
"grad_norm": 0.7607941031455994, |
|
"learning_rate": 8.546454767726162e-06, |
|
"loss": 0.9467, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.293398533007335, |
|
"grad_norm": 0.7950695753097534, |
|
"learning_rate": 8.534229828850856e-06, |
|
"loss": 0.9691, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29584352078239606, |
|
"grad_norm": 0.7780068516731262, |
|
"learning_rate": 8.522004889975551e-06, |
|
"loss": 0.9658, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2982885085574572, |
|
"grad_norm": 0.7490630149841309, |
|
"learning_rate": 8.509779951100246e-06, |
|
"loss": 0.9528, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.30073349633251834, |
|
"grad_norm": 0.7754825353622437, |
|
"learning_rate": 8.49755501222494e-06, |
|
"loss": 0.995, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.30317848410757947, |
|
"grad_norm": 0.7797338366508484, |
|
"learning_rate": 8.485330073349633e-06, |
|
"loss": 0.9976, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3056234718826406, |
|
"grad_norm": 0.7192637920379639, |
|
"learning_rate": 8.473105134474328e-06, |
|
"loss": 0.9764, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3080684596577017, |
|
"grad_norm": 0.7009165287017822, |
|
"learning_rate": 8.460880195599023e-06, |
|
"loss": 0.9794, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3105134474327628, |
|
"grad_norm": 0.7941039204597473, |
|
"learning_rate": 8.448655256723717e-06, |
|
"loss": 0.9701, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.31295843520782396, |
|
"grad_norm": 0.7658547163009644, |
|
"learning_rate": 8.436430317848412e-06, |
|
"loss": 0.9658, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3154034229828851, |
|
"grad_norm": 0.8190957903862, |
|
"learning_rate": 8.424205378973107e-06, |
|
"loss": 0.9566, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31784841075794623, |
|
"grad_norm": 0.7688964605331421, |
|
"learning_rate": 8.4119804400978e-06, |
|
"loss": 0.9743, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3202933985330073, |
|
"grad_norm": 0.7160921096801758, |
|
"learning_rate": 8.399755501222494e-06, |
|
"loss": 0.9363, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.32273838630806845, |
|
"grad_norm": 0.6877838373184204, |
|
"learning_rate": 8.387530562347189e-06, |
|
"loss": 0.9704, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3251833740831296, |
|
"grad_norm": 0.7970196008682251, |
|
"learning_rate": 8.375305623471884e-06, |
|
"loss": 0.9824, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3276283618581907, |
|
"grad_norm": 0.7897329926490784, |
|
"learning_rate": 8.363080684596577e-06, |
|
"loss": 0.971, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.33007334963325186, |
|
"grad_norm": 0.7263765931129456, |
|
"learning_rate": 8.350855745721273e-06, |
|
"loss": 0.9754, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.33251833740831294, |
|
"grad_norm": 0.6933903098106384, |
|
"learning_rate": 8.338630806845966e-06, |
|
"loss": 0.9451, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.33496332518337407, |
|
"grad_norm": 0.6975806951522827, |
|
"learning_rate": 8.32640586797066e-06, |
|
"loss": 0.9538, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3374083129584352, |
|
"grad_norm": 0.8183455467224121, |
|
"learning_rate": 8.314180929095355e-06, |
|
"loss": 0.955, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.33985330073349634, |
|
"grad_norm": 0.8085072636604309, |
|
"learning_rate": 8.30195599022005e-06, |
|
"loss": 0.9337, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3422982885085575, |
|
"grad_norm": 0.7899576425552368, |
|
"learning_rate": 8.289731051344743e-06, |
|
"loss": 0.984, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.34474327628361856, |
|
"grad_norm": 0.8049628734588623, |
|
"learning_rate": 8.277506112469438e-06, |
|
"loss": 0.967, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3471882640586797, |
|
"grad_norm": 0.7915717959403992, |
|
"learning_rate": 8.265281173594132e-06, |
|
"loss": 0.9379, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.34963325183374083, |
|
"grad_norm": 0.7397669553756714, |
|
"learning_rate": 8.253056234718827e-06, |
|
"loss": 0.9771, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.35207823960880197, |
|
"grad_norm": 0.7894385457038879, |
|
"learning_rate": 8.240831295843522e-06, |
|
"loss": 0.9647, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3545232273838631, |
|
"grad_norm": 0.7507162094116211, |
|
"learning_rate": 8.228606356968216e-06, |
|
"loss": 0.9783, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3569682151589242, |
|
"grad_norm": 0.8646982908248901, |
|
"learning_rate": 8.216381418092911e-06, |
|
"loss": 0.9817, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3594132029339853, |
|
"grad_norm": 0.7320569753646851, |
|
"learning_rate": 8.204156479217604e-06, |
|
"loss": 0.9502, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.36185819070904646, |
|
"grad_norm": 0.7949222922325134, |
|
"learning_rate": 8.191931540342299e-06, |
|
"loss": 0.9694, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3643031784841076, |
|
"grad_norm": 0.8175340294837952, |
|
"learning_rate": 8.179706601466993e-06, |
|
"loss": 0.972, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.36674816625916873, |
|
"grad_norm": 0.8479053974151611, |
|
"learning_rate": 8.167481662591688e-06, |
|
"loss": 1.0059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3691931540342298, |
|
"grad_norm": 0.7485557794570923, |
|
"learning_rate": 8.155256723716381e-06, |
|
"loss": 0.9303, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.37163814180929094, |
|
"grad_norm": 0.7518207430839539, |
|
"learning_rate": 8.143031784841077e-06, |
|
"loss": 0.9572, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3740831295843521, |
|
"grad_norm": 0.8368051052093506, |
|
"learning_rate": 8.13080684596577e-06, |
|
"loss": 0.9865, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3765281173594132, |
|
"grad_norm": 0.8675082325935364, |
|
"learning_rate": 8.118581907090465e-06, |
|
"loss": 0.9355, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.37897310513447435, |
|
"grad_norm": 0.8239061832427979, |
|
"learning_rate": 8.10635696821516e-06, |
|
"loss": 0.9475, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.38141809290953543, |
|
"grad_norm": 0.700191080570221, |
|
"learning_rate": 8.094132029339854e-06, |
|
"loss": 0.9343, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.38386308068459657, |
|
"grad_norm": 0.789686381816864, |
|
"learning_rate": 8.081907090464547e-06, |
|
"loss": 0.9633, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3863080684596577, |
|
"grad_norm": 0.8411287665367126, |
|
"learning_rate": 8.069682151589244e-06, |
|
"loss": 0.991, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.38875305623471884, |
|
"grad_norm": 0.7479956150054932, |
|
"learning_rate": 8.057457212713937e-06, |
|
"loss": 0.9371, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.39119804400978, |
|
"grad_norm": 0.726841151714325, |
|
"learning_rate": 8.045232273838631e-06, |
|
"loss": 0.9595, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.39364303178484106, |
|
"grad_norm": 0.7860400676727295, |
|
"learning_rate": 8.033007334963326e-06, |
|
"loss": 0.9673, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3960880195599022, |
|
"grad_norm": 0.8316232562065125, |
|
"learning_rate": 8.02078239608802e-06, |
|
"loss": 0.9592, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39853300733496333, |
|
"grad_norm": 0.743289053440094, |
|
"learning_rate": 8.008557457212714e-06, |
|
"loss": 0.9424, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.40097799511002447, |
|
"grad_norm": 0.7066758275032043, |
|
"learning_rate": 7.996332518337408e-06, |
|
"loss": 0.9515, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4034229828850856, |
|
"grad_norm": 0.8527940511703491, |
|
"learning_rate": 7.984107579462103e-06, |
|
"loss": 0.9579, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4058679706601467, |
|
"grad_norm": 0.747097909450531, |
|
"learning_rate": 7.971882640586798e-06, |
|
"loss": 0.96, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4083129584352078, |
|
"grad_norm": 0.7440111637115479, |
|
"learning_rate": 7.959657701711492e-06, |
|
"loss": 0.9428, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.41075794621026895, |
|
"grad_norm": 0.7893000245094299, |
|
"learning_rate": 7.947432762836187e-06, |
|
"loss": 0.9775, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4132029339853301, |
|
"grad_norm": 0.7849116325378418, |
|
"learning_rate": 7.935207823960882e-06, |
|
"loss": 0.9636, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4156479217603912, |
|
"grad_norm": 0.8839460611343384, |
|
"learning_rate": 7.922982885085575e-06, |
|
"loss": 0.9427, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4180929095354523, |
|
"grad_norm": 0.7875691056251526, |
|
"learning_rate": 7.91075794621027e-06, |
|
"loss": 0.9188, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.42053789731051344, |
|
"grad_norm": 0.7899657487869263, |
|
"learning_rate": 7.898533007334964e-06, |
|
"loss": 0.9337, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4229828850855746, |
|
"grad_norm": 0.8064518570899963, |
|
"learning_rate": 7.886308068459659e-06, |
|
"loss": 0.9473, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4254278728606357, |
|
"grad_norm": 0.9735845327377319, |
|
"learning_rate": 7.874083129584352e-06, |
|
"loss": 0.943, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4278728606356968, |
|
"grad_norm": 0.8656879663467407, |
|
"learning_rate": 7.861858190709048e-06, |
|
"loss": 0.9276, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.43031784841075793, |
|
"grad_norm": 0.7446141242980957, |
|
"learning_rate": 7.849633251833741e-06, |
|
"loss": 0.9609, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.43276283618581907, |
|
"grad_norm": 0.7805430889129639, |
|
"learning_rate": 7.837408312958436e-06, |
|
"loss": 0.9437, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4352078239608802, |
|
"grad_norm": 0.8328519463539124, |
|
"learning_rate": 7.82518337408313e-06, |
|
"loss": 0.9921, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.43765281173594134, |
|
"grad_norm": 0.7910134196281433, |
|
"learning_rate": 7.812958435207825e-06, |
|
"loss": 0.9392, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4400977995110024, |
|
"grad_norm": 0.8082761168479919, |
|
"learning_rate": 7.800733496332518e-06, |
|
"loss": 0.9655, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.44254278728606355, |
|
"grad_norm": 0.7104289531707764, |
|
"learning_rate": 7.788508557457214e-06, |
|
"loss": 0.9604, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4449877750611247, |
|
"grad_norm": 0.7942298054695129, |
|
"learning_rate": 7.776283618581907e-06, |
|
"loss": 0.9839, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4474327628361858, |
|
"grad_norm": 0.7665939927101135, |
|
"learning_rate": 7.764058679706602e-06, |
|
"loss": 0.9907, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44987775061124696, |
|
"grad_norm": 0.8066325187683105, |
|
"learning_rate": 7.751833740831297e-06, |
|
"loss": 0.9503, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.45232273838630804, |
|
"grad_norm": 0.7494056224822998, |
|
"learning_rate": 7.739608801955991e-06, |
|
"loss": 0.9594, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4547677261613692, |
|
"grad_norm": 0.7743037939071655, |
|
"learning_rate": 7.727383863080684e-06, |
|
"loss": 0.9708, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4572127139364303, |
|
"grad_norm": 0.6371968388557434, |
|
"learning_rate": 7.715158924205379e-06, |
|
"loss": 0.9247, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.45965770171149145, |
|
"grad_norm": 0.8373169898986816, |
|
"learning_rate": 7.702933985330074e-06, |
|
"loss": 0.9631, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4621026894865526, |
|
"grad_norm": 0.898855984210968, |
|
"learning_rate": 7.690709046454768e-06, |
|
"loss": 0.9598, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.46454767726161367, |
|
"grad_norm": 0.7827709317207336, |
|
"learning_rate": 7.678484107579463e-06, |
|
"loss": 0.9407, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4669926650366748, |
|
"grad_norm": 0.8428008556365967, |
|
"learning_rate": 7.666259168704158e-06, |
|
"loss": 0.9753, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46943765281173594, |
|
"grad_norm": 0.7905760407447815, |
|
"learning_rate": 7.654034229828853e-06, |
|
"loss": 0.9487, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4718826405867971, |
|
"grad_norm": 0.7715455293655396, |
|
"learning_rate": 7.641809290953546e-06, |
|
"loss": 0.9304, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4743276283618582, |
|
"grad_norm": 0.7903933525085449, |
|
"learning_rate": 7.62958435207824e-06, |
|
"loss": 0.9277, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4767726161369193, |
|
"grad_norm": 0.7191179990768433, |
|
"learning_rate": 7.617359413202935e-06, |
|
"loss": 0.915, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4792176039119804, |
|
"grad_norm": 0.8574967980384827, |
|
"learning_rate": 7.605134474327629e-06, |
|
"loss": 0.918, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.48166259168704156, |
|
"grad_norm": 0.8661892414093018, |
|
"learning_rate": 7.592909535452323e-06, |
|
"loss": 0.9658, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4841075794621027, |
|
"grad_norm": 0.8399495482444763, |
|
"learning_rate": 7.580684596577018e-06, |
|
"loss": 0.9611, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.48655256723716384, |
|
"grad_norm": 0.8675727844238281, |
|
"learning_rate": 7.568459657701712e-06, |
|
"loss": 0.9367, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4889975550122249, |
|
"grad_norm": 0.8307384252548218, |
|
"learning_rate": 7.5562347188264065e-06, |
|
"loss": 0.9628, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4889975550122249, |
|
"eval_loss": 0.9446731805801392, |
|
"eval_runtime": 795.9747, |
|
"eval_samples_per_second": 18.269, |
|
"eval_steps_per_second": 0.572, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49144254278728605, |
|
"grad_norm": 0.7585815191268921, |
|
"learning_rate": 7.544009779951101e-06, |
|
"loss": 0.9499, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4938875305623472, |
|
"grad_norm": 0.870883584022522, |
|
"learning_rate": 7.531784841075795e-06, |
|
"loss": 0.9758, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4963325183374083, |
|
"grad_norm": 0.7365143895149231, |
|
"learning_rate": 7.51955990220049e-06, |
|
"loss": 0.9429, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.49877750611246946, |
|
"grad_norm": 0.9007924199104309, |
|
"learning_rate": 7.5073349633251836e-06, |
|
"loss": 0.9208, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5012224938875306, |
|
"grad_norm": 0.8040947914123535, |
|
"learning_rate": 7.495110024449879e-06, |
|
"loss": 0.931, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5036674816625917, |
|
"grad_norm": 0.7994057536125183, |
|
"learning_rate": 7.482885085574573e-06, |
|
"loss": 0.9406, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5061124694376528, |
|
"grad_norm": 0.7404657006263733, |
|
"learning_rate": 7.470660146699267e-06, |
|
"loss": 0.91, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.508557457212714, |
|
"grad_norm": 0.8444405794143677, |
|
"learning_rate": 7.458435207823962e-06, |
|
"loss": 0.9748, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.511002444987775, |
|
"grad_norm": 0.830786406993866, |
|
"learning_rate": 7.446210268948656e-06, |
|
"loss": 0.9546, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5134474327628362, |
|
"grad_norm": 0.8706929683685303, |
|
"learning_rate": 7.43398533007335e-06, |
|
"loss": 0.9359, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5158924205378973, |
|
"grad_norm": 0.937125563621521, |
|
"learning_rate": 7.4217603911980454e-06, |
|
"loss": 0.9367, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5183374083129584, |
|
"grad_norm": 0.871612548828125, |
|
"learning_rate": 7.409535452322739e-06, |
|
"loss": 0.9363, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5207823960880196, |
|
"grad_norm": 0.7397099733352661, |
|
"learning_rate": 7.397310513447433e-06, |
|
"loss": 0.9312, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5232273838630807, |
|
"grad_norm": 0.9171463847160339, |
|
"learning_rate": 7.385085574572127e-06, |
|
"loss": 0.9657, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5256723716381418, |
|
"grad_norm": 0.7274787425994873, |
|
"learning_rate": 7.3728606356968224e-06, |
|
"loss": 0.9359, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5281173594132029, |
|
"grad_norm": 0.8870149850845337, |
|
"learning_rate": 7.360635696821516e-06, |
|
"loss": 0.9469, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.530562347188264, |
|
"grad_norm": 0.770986020565033, |
|
"learning_rate": 7.34841075794621e-06, |
|
"loss": 0.9529, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5330073349633252, |
|
"grad_norm": 0.7791648507118225, |
|
"learning_rate": 7.336185819070906e-06, |
|
"loss": 0.9509, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5354523227383863, |
|
"grad_norm": 0.8232088088989258, |
|
"learning_rate": 7.3239608801955995e-06, |
|
"loss": 0.9639, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5378973105134475, |
|
"grad_norm": 0.8500565886497498, |
|
"learning_rate": 7.311735941320294e-06, |
|
"loss": 0.97, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5403422982885085, |
|
"grad_norm": 0.8228991627693176, |
|
"learning_rate": 7.299511002444989e-06, |
|
"loss": 0.9514, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5427872860635696, |
|
"grad_norm": 0.7978509068489075, |
|
"learning_rate": 7.287286063569683e-06, |
|
"loss": 0.9599, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5452322738386308, |
|
"grad_norm": 0.7359360456466675, |
|
"learning_rate": 7.275061124694377e-06, |
|
"loss": 0.9087, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5476772616136919, |
|
"grad_norm": 0.8995153903961182, |
|
"learning_rate": 7.262836185819071e-06, |
|
"loss": 0.9082, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5501222493887531, |
|
"grad_norm": 0.7778546214103699, |
|
"learning_rate": 7.250611246943766e-06, |
|
"loss": 0.9599, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5525672371638142, |
|
"grad_norm": 0.7799263596534729, |
|
"learning_rate": 7.2383863080684605e-06, |
|
"loss": 0.895, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5550122249388753, |
|
"grad_norm": 0.831047534942627, |
|
"learning_rate": 7.226161369193154e-06, |
|
"loss": 0.9426, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5574572127139364, |
|
"grad_norm": 0.863277792930603, |
|
"learning_rate": 7.213936430317849e-06, |
|
"loss": 0.947, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5599022004889975, |
|
"grad_norm": 0.7998844981193542, |
|
"learning_rate": 7.201711491442544e-06, |
|
"loss": 0.9366, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5623471882640587, |
|
"grad_norm": 0.818229079246521, |
|
"learning_rate": 7.1894865525672375e-06, |
|
"loss": 0.9524, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5647921760391198, |
|
"grad_norm": 0.8541170954704285, |
|
"learning_rate": 7.177261613691933e-06, |
|
"loss": 0.9297, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5672371638141809, |
|
"grad_norm": 0.7943381071090698, |
|
"learning_rate": 7.165036674816627e-06, |
|
"loss": 0.9186, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5696821515892421, |
|
"grad_norm": 0.886789858341217, |
|
"learning_rate": 7.152811735941321e-06, |
|
"loss": 0.9551, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5721271393643031, |
|
"grad_norm": 0.7614038586616516, |
|
"learning_rate": 7.1405867970660145e-06, |
|
"loss": 0.9427, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5745721271393643, |
|
"grad_norm": 0.8962105512619019, |
|
"learning_rate": 7.12836185819071e-06, |
|
"loss": 0.9191, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5770171149144254, |
|
"grad_norm": 0.9211586713790894, |
|
"learning_rate": 7.116136919315404e-06, |
|
"loss": 0.933, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5794621026894865, |
|
"grad_norm": 0.9243327975273132, |
|
"learning_rate": 7.103911980440098e-06, |
|
"loss": 0.9152, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5819070904645477, |
|
"grad_norm": 0.851266086101532, |
|
"learning_rate": 7.091687041564793e-06, |
|
"loss": 0.9697, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5843520782396088, |
|
"grad_norm": 0.8055517673492432, |
|
"learning_rate": 7.079462102689487e-06, |
|
"loss": 0.921, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.58679706601467, |
|
"grad_norm": 0.9354420900344849, |
|
"learning_rate": 7.067237163814181e-06, |
|
"loss": 0.9368, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.589242053789731, |
|
"grad_norm": 0.8339366912841797, |
|
"learning_rate": 7.055012224938876e-06, |
|
"loss": 0.9387, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5916870415647921, |
|
"grad_norm": 0.8128229975700378, |
|
"learning_rate": 7.04278728606357e-06, |
|
"loss": 0.9462, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5941320293398533, |
|
"grad_norm": 0.7740962505340576, |
|
"learning_rate": 7.030562347188264e-06, |
|
"loss": 0.931, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5965770171149144, |
|
"grad_norm": 0.8874317407608032, |
|
"learning_rate": 7.018337408312959e-06, |
|
"loss": 0.9809, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5990220048899756, |
|
"grad_norm": 0.8482634425163269, |
|
"learning_rate": 7.006112469437653e-06, |
|
"loss": 0.9104, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6014669926650367, |
|
"grad_norm": 0.7957248687744141, |
|
"learning_rate": 6.993887530562348e-06, |
|
"loss": 0.9195, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6039119804400978, |
|
"grad_norm": 0.8182454109191895, |
|
"learning_rate": 6.981662591687042e-06, |
|
"loss": 0.9565, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6063569682151589, |
|
"grad_norm": 0.7790459990501404, |
|
"learning_rate": 6.969437652811737e-06, |
|
"loss": 0.9114, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.60880195599022, |
|
"grad_norm": 0.7870607376098633, |
|
"learning_rate": 6.957212713936431e-06, |
|
"loss": 0.9372, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6112469437652812, |
|
"grad_norm": 0.8847014307975769, |
|
"learning_rate": 6.944987775061125e-06, |
|
"loss": 0.9344, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6136919315403423, |
|
"grad_norm": 0.8556163907051086, |
|
"learning_rate": 6.93276283618582e-06, |
|
"loss": 0.915, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6161369193154034, |
|
"grad_norm": 0.846760630607605, |
|
"learning_rate": 6.9205378973105144e-06, |
|
"loss": 0.9301, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6185819070904646, |
|
"grad_norm": 0.7853952646255493, |
|
"learning_rate": 6.908312958435208e-06, |
|
"loss": 0.9395, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6210268948655256, |
|
"grad_norm": 0.8032101392745972, |
|
"learning_rate": 6.896088019559902e-06, |
|
"loss": 0.9561, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6234718826405868, |
|
"grad_norm": 0.7820572853088379, |
|
"learning_rate": 6.883863080684598e-06, |
|
"loss": 0.9167, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6259168704156479, |
|
"grad_norm": 0.8646982908248901, |
|
"learning_rate": 6.8716381418092915e-06, |
|
"loss": 0.9297, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.628361858190709, |
|
"grad_norm": 0.8482604026794434, |
|
"learning_rate": 6.859413202933985e-06, |
|
"loss": 0.9254, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6308068459657702, |
|
"grad_norm": 0.775800883769989, |
|
"learning_rate": 6.847188264058681e-06, |
|
"loss": 0.9346, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6332518337408313, |
|
"grad_norm": 0.7460722327232361, |
|
"learning_rate": 6.834963325183375e-06, |
|
"loss": 0.9545, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6356968215158925, |
|
"grad_norm": 0.8128139972686768, |
|
"learning_rate": 6.8227383863080685e-06, |
|
"loss": 0.9262, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6381418092909535, |
|
"grad_norm": 0.8368150591850281, |
|
"learning_rate": 6.810513447432764e-06, |
|
"loss": 0.9123, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6405867970660146, |
|
"grad_norm": 0.7825431227684021, |
|
"learning_rate": 6.798288508557458e-06, |
|
"loss": 0.9336, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6430317848410758, |
|
"grad_norm": 0.7432078123092651, |
|
"learning_rate": 6.786063569682152e-06, |
|
"loss": 0.9396, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6454767726161369, |
|
"grad_norm": 0.6994110345840454, |
|
"learning_rate": 6.773838630806846e-06, |
|
"loss": 0.9527, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6479217603911981, |
|
"grad_norm": 0.8581116795539856, |
|
"learning_rate": 6.761613691931541e-06, |
|
"loss": 0.8979, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6503667481662592, |
|
"grad_norm": 0.7824479937553406, |
|
"learning_rate": 6.749388753056235e-06, |
|
"loss": 0.93, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6528117359413202, |
|
"grad_norm": 0.7407302260398865, |
|
"learning_rate": 6.7371638141809295e-06, |
|
"loss": 0.9175, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6552567237163814, |
|
"grad_norm": 0.8635402917861938, |
|
"learning_rate": 6.724938875305624e-06, |
|
"loss": 0.951, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6577017114914425, |
|
"grad_norm": 0.7739470601081848, |
|
"learning_rate": 6.712713936430319e-06, |
|
"loss": 0.9199, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6601466992665037, |
|
"grad_norm": 0.879205048084259, |
|
"learning_rate": 6.700488997555013e-06, |
|
"loss": 0.9296, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6625916870415648, |
|
"grad_norm": 0.7694469094276428, |
|
"learning_rate": 6.688264058679707e-06, |
|
"loss": 0.9312, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6650366748166259, |
|
"grad_norm": 0.8447741270065308, |
|
"learning_rate": 6.676039119804402e-06, |
|
"loss": 0.9486, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6674816625916871, |
|
"grad_norm": 0.7987990379333496, |
|
"learning_rate": 6.663814180929096e-06, |
|
"loss": 0.9554, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6699266503667481, |
|
"grad_norm": 0.8286950588226318, |
|
"learning_rate": 6.65158924205379e-06, |
|
"loss": 0.9264, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6723716381418093, |
|
"grad_norm": 0.8135730028152466, |
|
"learning_rate": 6.639364303178485e-06, |
|
"loss": 0.9183, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6748166259168704, |
|
"grad_norm": 0.9580305218696594, |
|
"learning_rate": 6.627139364303179e-06, |
|
"loss": 0.9055, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6772616136919315, |
|
"grad_norm": 0.7580097317695618, |
|
"learning_rate": 6.614914425427873e-06, |
|
"loss": 0.923, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6797066014669927, |
|
"grad_norm": 0.7250223755836487, |
|
"learning_rate": 6.602689486552568e-06, |
|
"loss": 0.9472, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6821515892420538, |
|
"grad_norm": 0.720930278301239, |
|
"learning_rate": 6.590464547677262e-06, |
|
"loss": 0.9155, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.684596577017115, |
|
"grad_norm": 0.7639275789260864, |
|
"learning_rate": 6.578239608801956e-06, |
|
"loss": 0.917, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.687041564792176, |
|
"grad_norm": 0.8890448808670044, |
|
"learning_rate": 6.5660146699266516e-06, |
|
"loss": 0.933, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6894865525672371, |
|
"grad_norm": 0.816336452960968, |
|
"learning_rate": 6.553789731051345e-06, |
|
"loss": 0.9606, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6919315403422983, |
|
"grad_norm": 0.8258111476898193, |
|
"learning_rate": 6.541564792176039e-06, |
|
"loss": 0.9331, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6943765281173594, |
|
"grad_norm": 0.8009893894195557, |
|
"learning_rate": 6.529339853300734e-06, |
|
"loss": 0.918, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6968215158924206, |
|
"grad_norm": 0.861232578754425, |
|
"learning_rate": 6.517114914425429e-06, |
|
"loss": 0.9157, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6992665036674817, |
|
"grad_norm": 0.8324180841445923, |
|
"learning_rate": 6.504889975550122e-06, |
|
"loss": 0.9322, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7017114914425427, |
|
"grad_norm": 0.8205760717391968, |
|
"learning_rate": 6.492665036674817e-06, |
|
"loss": 0.9401, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7041564792176039, |
|
"grad_norm": 0.8924916386604309, |
|
"learning_rate": 6.480440097799512e-06, |
|
"loss": 0.9165, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.706601466992665, |
|
"grad_norm": 0.7587029337882996, |
|
"learning_rate": 6.468215158924206e-06, |
|
"loss": 0.9127, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7090464547677262, |
|
"grad_norm": 0.7448475360870361, |
|
"learning_rate": 6.4559902200489e-06, |
|
"loss": 0.9366, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7114914425427873, |
|
"grad_norm": 0.8168658018112183, |
|
"learning_rate": 6.443765281173595e-06, |
|
"loss": 0.9377, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7139364303178484, |
|
"grad_norm": 0.7668200731277466, |
|
"learning_rate": 6.431540342298289e-06, |
|
"loss": 0.9342, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7163814180929096, |
|
"grad_norm": 0.8051294088363647, |
|
"learning_rate": 6.4193154034229834e-06, |
|
"loss": 0.9236, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7188264058679706, |
|
"grad_norm": 0.7371141910552979, |
|
"learning_rate": 6.407090464547677e-06, |
|
"loss": 0.9123, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7212713936430318, |
|
"grad_norm": 0.7507117390632629, |
|
"learning_rate": 6.394865525672373e-06, |
|
"loss": 0.8978, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7237163814180929, |
|
"grad_norm": 0.8171530365943909, |
|
"learning_rate": 6.382640586797067e-06, |
|
"loss": 0.9516, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.726161369193154, |
|
"grad_norm": 0.8637788891792297, |
|
"learning_rate": 6.3704156479217605e-06, |
|
"loss": 0.8987, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7286063569682152, |
|
"grad_norm": 0.7505759596824646, |
|
"learning_rate": 6.358190709046456e-06, |
|
"loss": 0.9134, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7310513447432763, |
|
"grad_norm": 0.9182484149932861, |
|
"learning_rate": 6.34596577017115e-06, |
|
"loss": 0.9351, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7334963325183375, |
|
"grad_norm": 1.006665825843811, |
|
"learning_rate": 6.333740831295844e-06, |
|
"loss": 0.9171, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7334963325183375, |
|
"eval_loss": 0.9244844913482666, |
|
"eval_runtime": 795.2171, |
|
"eval_samples_per_second": 18.287, |
|
"eval_steps_per_second": 0.572, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 8180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.914805497032868e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|