| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 14.899028305872413, | |
| "eval_steps": 500, | |
| "global_step": 2205, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03379805661174482, | |
| "grad_norm": 1.3233964443206787, | |
| "learning_rate": 4.5248868778280546e-07, | |
| "loss": 3.0504, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06759611322348964, | |
| "grad_norm": 1.3710697889328003, | |
| "learning_rate": 9.049773755656109e-07, | |
| "loss": 3.1346, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10139416983523447, | |
| "grad_norm": 1.6499178409576416, | |
| "learning_rate": 1.3574660633484164e-06, | |
| "loss": 3.0897, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13519222644697929, | |
| "grad_norm": 1.1821593046188354, | |
| "learning_rate": 1.8099547511312218e-06, | |
| "loss": 3.0632, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16899028305872413, | |
| "grad_norm": 1.2189937829971313, | |
| "learning_rate": 2.2624434389140273e-06, | |
| "loss": 3.1182, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.20278833967046894, | |
| "grad_norm": 1.5012513399124146, | |
| "learning_rate": 2.7149321266968327e-06, | |
| "loss": 2.9984, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23658639628221378, | |
| "grad_norm": 1.459802508354187, | |
| "learning_rate": 3.167420814479638e-06, | |
| "loss": 3.141, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.27038445289395857, | |
| "grad_norm": 1.2144436836242676, | |
| "learning_rate": 3.6199095022624436e-06, | |
| "loss": 2.8889, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3041825095057034, | |
| "grad_norm": 1.1169312000274658, | |
| "learning_rate": 4.072398190045249e-06, | |
| "loss": 2.8316, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.33798056611744826, | |
| "grad_norm": 2.0920190811157227, | |
| "learning_rate": 4.5248868778280546e-06, | |
| "loss": 2.8712, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3717786227291931, | |
| "grad_norm": 2.2176926136016846, | |
| "learning_rate": 4.97737556561086e-06, | |
| "loss": 2.7896, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4055766793409379, | |
| "grad_norm": 3.200904369354248, | |
| "learning_rate": 5.4298642533936655e-06, | |
| "loss": 2.7888, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4393747359526827, | |
| "grad_norm": 1.8033865690231323, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 2.4676, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.47317279256442757, | |
| "grad_norm": 1.055401086807251, | |
| "learning_rate": 6.334841628959276e-06, | |
| "loss": 2.4417, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5069708491761724, | |
| "grad_norm": 1.2879326343536377, | |
| "learning_rate": 6.787330316742083e-06, | |
| "loss": 2.4093, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5407689057879171, | |
| "grad_norm": 1.1458905935287476, | |
| "learning_rate": 7.239819004524887e-06, | |
| "loss": 2.3892, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.574566962399662, | |
| "grad_norm": 1.0109455585479736, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 2.2264, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6083650190114068, | |
| "grad_norm": 1.0762925148010254, | |
| "learning_rate": 8.144796380090498e-06, | |
| "loss": 2.1744, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6421630756231517, | |
| "grad_norm": 1.1063696146011353, | |
| "learning_rate": 8.597285067873304e-06, | |
| "loss": 2.1335, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6759611322348965, | |
| "grad_norm": 1.0435007810592651, | |
| "learning_rate": 9.049773755656109e-06, | |
| "loss": 2.0953, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7097591888466414, | |
| "grad_norm": 1.2886987924575806, | |
| "learning_rate": 9.502262443438914e-06, | |
| "loss": 2.0159, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7435572454583862, | |
| "grad_norm": 1.2186506986618042, | |
| "learning_rate": 9.95475113122172e-06, | |
| "loss": 1.9618, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7773553020701309, | |
| "grad_norm": 1.1026384830474854, | |
| "learning_rate": 1.0407239819004526e-05, | |
| "loss": 1.9636, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.8111533586818758, | |
| "grad_norm": 1.2348583936691284, | |
| "learning_rate": 1.0859728506787331e-05, | |
| "loss": 1.9061, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8449514152936206, | |
| "grad_norm": 1.2891064882278442, | |
| "learning_rate": 1.1312217194570137e-05, | |
| "loss": 1.8485, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8787494719053655, | |
| "grad_norm": 1.1830450296401978, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 1.8878, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9125475285171103, | |
| "grad_norm": 1.3732470273971558, | |
| "learning_rate": 1.2217194570135748e-05, | |
| "loss": 1.8483, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9463455851288551, | |
| "grad_norm": 1.3864206075668335, | |
| "learning_rate": 1.2669683257918553e-05, | |
| "loss": 1.6419, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9801436417405999, | |
| "grad_norm": 1.529963493347168, | |
| "learning_rate": 1.3122171945701359e-05, | |
| "loss": 1.6966, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0135192226446978, | |
| "grad_norm": 1.531701683998108, | |
| "learning_rate": 1.3574660633484165e-05, | |
| "loss": 1.7206, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0473172792564427, | |
| "grad_norm": 1.2663397789001465, | |
| "learning_rate": 1.4027149321266968e-05, | |
| "loss": 1.5889, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0811153358681875, | |
| "grad_norm": 1.1123305559158325, | |
| "learning_rate": 1.4479638009049775e-05, | |
| "loss": 1.5019, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.1149133924799324, | |
| "grad_norm": 1.5772109031677246, | |
| "learning_rate": 1.4932126696832581e-05, | |
| "loss": 1.5866, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.1487114490916772, | |
| "grad_norm": 1.6601964235305786, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 1.4743, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.182509505703422, | |
| "grad_norm": 1.7369259595870972, | |
| "learning_rate": 1.5837104072398192e-05, | |
| "loss": 1.4523, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.216307562315167, | |
| "grad_norm": 1.5128813982009888, | |
| "learning_rate": 1.6289592760180996e-05, | |
| "loss": 1.5253, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2501056189269117, | |
| "grad_norm": 1.5346794128417969, | |
| "learning_rate": 1.6742081447963804e-05, | |
| "loss": 1.5044, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.2839036755386566, | |
| "grad_norm": 1.4628633260726929, | |
| "learning_rate": 1.719457013574661e-05, | |
| "loss": 1.2012, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.3177017321504014, | |
| "grad_norm": 1.670690655708313, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 1.3808, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.3514997887621463, | |
| "grad_norm": 1.7677953243255615, | |
| "learning_rate": 1.8099547511312218e-05, | |
| "loss": 1.2181, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.385297845373891, | |
| "grad_norm": 1.5957576036453247, | |
| "learning_rate": 1.8552036199095026e-05, | |
| "loss": 1.246, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.419095901985636, | |
| "grad_norm": 2.146883010864258, | |
| "learning_rate": 1.9004524886877827e-05, | |
| "loss": 1.247, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.4528939585973806, | |
| "grad_norm": 1.9484453201293945, | |
| "learning_rate": 1.9457013574660635e-05, | |
| "loss": 1.2372, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.4866920152091254, | |
| "grad_norm": 2.067981243133545, | |
| "learning_rate": 1.990950226244344e-05, | |
| "loss": 1.0892, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.5204900718208703, | |
| "grad_norm": 2.743396043777466, | |
| "learning_rate": 1.9999799412001547e-05, | |
| "loss": 1.0867, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.554288128432615, | |
| "grad_norm": 1.6774858236312866, | |
| "learning_rate": 1.9998984537049476e-05, | |
| "loss": 0.9716, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.58808618504436, | |
| "grad_norm": 2.1075453758239746, | |
| "learning_rate": 1.9997542889433917e-05, | |
| "loss": 1.0524, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.6218842416561048, | |
| "grad_norm": 2.1486105918884277, | |
| "learning_rate": 1.9995474559522576e-05, | |
| "loss": 0.996, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.6556822982678496, | |
| "grad_norm": 1.7104390859603882, | |
| "learning_rate": 1.9992779676965884e-05, | |
| "loss": 1.0356, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.6894803548795945, | |
| "grad_norm": 2.395637035369873, | |
| "learning_rate": 1.9989458410688865e-05, | |
| "loss": 1.0114, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.7232784114913393, | |
| "grad_norm": 2.096191644668579, | |
| "learning_rate": 1.9985510968880555e-05, | |
| "loss": 1.0029, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.757076468103084, | |
| "grad_norm": 3.1734085083007812, | |
| "learning_rate": 1.9980937598980943e-05, | |
| "loss": 0.9794, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.790874524714829, | |
| "grad_norm": 2.1142361164093018, | |
| "learning_rate": 1.9975738587665455e-05, | |
| "loss": 1.0681, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.8246725813265736, | |
| "grad_norm": 2.51534104347229, | |
| "learning_rate": 1.996991426082701e-05, | |
| "loss": 0.964, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.8584706379383187, | |
| "grad_norm": 2.2418782711029053, | |
| "learning_rate": 1.9963464983555557e-05, | |
| "loss": 0.9054, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.8922686945500633, | |
| "grad_norm": 2.073915958404541, | |
| "learning_rate": 1.9956391160115224e-05, | |
| "loss": 0.8698, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.9260667511618081, | |
| "grad_norm": 2.290095806121826, | |
| "learning_rate": 1.994869323391895e-05, | |
| "loss": 0.9558, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.959864807773553, | |
| "grad_norm": 2.205028533935547, | |
| "learning_rate": 1.9940371687500713e-05, | |
| "loss": 0.8184, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.9936628643852978, | |
| "grad_norm": 3.1502742767333984, | |
| "learning_rate": 1.9931427042485252e-05, | |
| "loss": 1.11, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.0270384452893957, | |
| "grad_norm": 2.171036958694458, | |
| "learning_rate": 1.992185985955541e-05, | |
| "loss": 0.7225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.0608365019011408, | |
| "grad_norm": 2.4927501678466797, | |
| "learning_rate": 1.991167073841695e-05, | |
| "loss": 1.0667, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.0946345585128854, | |
| "grad_norm": 2.1560094356536865, | |
| "learning_rate": 1.990086031776099e-05, | |
| "loss": 0.7699, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.1284326151246304, | |
| "grad_norm": 2.2326228618621826, | |
| "learning_rate": 1.9889429275223958e-05, | |
| "loss": 0.8313, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.162230671736375, | |
| "grad_norm": 2.2837958335876465, | |
| "learning_rate": 1.9877378327345115e-05, | |
| "loss": 0.8124, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.19602872834812, | |
| "grad_norm": 2.033587694168091, | |
| "learning_rate": 1.9864708229521637e-05, | |
| "loss": 0.7758, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.2298267849598647, | |
| "grad_norm": 2.212913990020752, | |
| "learning_rate": 1.9851419775961265e-05, | |
| "loss": 0.6772, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.26362484157161, | |
| "grad_norm": 2.4761927127838135, | |
| "learning_rate": 1.9837513799632536e-05, | |
| "loss": 0.6488, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.2974228981833544, | |
| "grad_norm": 2.0889394283294678, | |
| "learning_rate": 1.982299117221254e-05, | |
| "loss": 0.6567, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.331220954795099, | |
| "grad_norm": 1.7731271982192993, | |
| "learning_rate": 1.9807852804032306e-05, | |
| "loss": 0.5775, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.365019011406844, | |
| "grad_norm": 2.9344234466552734, | |
| "learning_rate": 1.979209964401973e-05, | |
| "loss": 0.711, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.3988170680185887, | |
| "grad_norm": 2.7177066802978516, | |
| "learning_rate": 1.9775732679640093e-05, | |
| "loss": 0.5417, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.432615124630334, | |
| "grad_norm": 1.9662398099899292, | |
| "learning_rate": 1.975875293683416e-05, | |
| "loss": 0.7523, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.4664131812420784, | |
| "grad_norm": 2.3415379524230957, | |
| "learning_rate": 1.9741161479953872e-05, | |
| "loss": 0.5889, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.5002112378538235, | |
| "grad_norm": 2.181759834289551, | |
| "learning_rate": 1.9722959411695636e-05, | |
| "loss": 0.6306, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.534009294465568, | |
| "grad_norm": 2.549531936645508, | |
| "learning_rate": 1.970414787303119e-05, | |
| "loss": 0.7365, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.567807351077313, | |
| "grad_norm": 2.4441099166870117, | |
| "learning_rate": 1.9684728043136093e-05, | |
| "loss": 0.6466, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.601605407689058, | |
| "grad_norm": 2.6284027099609375, | |
| "learning_rate": 1.966470113931582e-05, | |
| "loss": 0.6229, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.635403464300803, | |
| "grad_norm": 2.808634042739868, | |
| "learning_rate": 1.9644068416929417e-05, | |
| "loss": 0.6366, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.6692015209125475, | |
| "grad_norm": 2.9602878093719482, | |
| "learning_rate": 1.9622831169310864e-05, | |
| "loss": 0.6766, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.7029995775242925, | |
| "grad_norm": 3.1014065742492676, | |
| "learning_rate": 1.9600990727687964e-05, | |
| "loss": 0.7399, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.736797634136037, | |
| "grad_norm": 1.8795526027679443, | |
| "learning_rate": 1.9578548461098912e-05, | |
| "loss": 0.635, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.770595690747782, | |
| "grad_norm": 2.7618279457092285, | |
| "learning_rate": 1.9555505776306492e-05, | |
| "loss": 0.6349, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.804393747359527, | |
| "grad_norm": 2.2114012241363525, | |
| "learning_rate": 1.9531864117709855e-05, | |
| "loss": 0.5364, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.838191803971272, | |
| "grad_norm": 2.5741758346557617, | |
| "learning_rate": 1.950762496725403e-05, | |
| "loss": 0.6202, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.8719898605830165, | |
| "grad_norm": 2.607922077178955, | |
| "learning_rate": 1.948278984433699e-05, | |
| "loss": 0.703, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.905787917194761, | |
| "grad_norm": 2.70082688331604, | |
| "learning_rate": 1.945736030571443e-05, | |
| "loss": 0.5793, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.939585973806506, | |
| "grad_norm": 2.543623447418213, | |
| "learning_rate": 1.9431337945402186e-05, | |
| "loss": 0.488, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.973384030418251, | |
| "grad_norm": 2.2461986541748047, | |
| "learning_rate": 1.9404724394576305e-05, | |
| "loss": 0.561, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.006759611322349, | |
| "grad_norm": 2.3420913219451904, | |
| "learning_rate": 1.9377521321470806e-05, | |
| "loss": 0.5422, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 3.040557667934094, | |
| "grad_norm": 2.485952854156494, | |
| "learning_rate": 1.93497304312731e-05, | |
| "loss": 0.4076, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.0743557245458386, | |
| "grad_norm": 2.928043842315674, | |
| "learning_rate": 1.932135346601711e-05, | |
| "loss": 0.4619, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 3.1081537811575832, | |
| "grad_norm": 3.641951084136963, | |
| "learning_rate": 1.9292392204474075e-05, | |
| "loss": 0.649, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.1419518377693283, | |
| "grad_norm": 2.47162127494812, | |
| "learning_rate": 1.9262848462041046e-05, | |
| "loss": 0.4297, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.175749894381073, | |
| "grad_norm": 2.943067789077759, | |
| "learning_rate": 1.923272409062709e-05, | |
| "loss": 0.5152, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.209547950992818, | |
| "grad_norm": 3.1264185905456543, | |
| "learning_rate": 1.920202097853721e-05, | |
| "loss": 0.5389, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.2433460076045626, | |
| "grad_norm": 2.739868402481079, | |
| "learning_rate": 1.917074105035397e-05, | |
| "loss": 0.5507, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.2771440642163077, | |
| "grad_norm": 2.510500907897949, | |
| "learning_rate": 1.9138886266816868e-05, | |
| "loss": 0.4332, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.3109421208280523, | |
| "grad_norm": 2.6104397773742676, | |
| "learning_rate": 1.9106458624699425e-05, | |
| "loss": 0.6674, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.3447401774397973, | |
| "grad_norm": 2.4239916801452637, | |
| "learning_rate": 1.907346015668401e-05, | |
| "loss": 0.4281, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.378538234051542, | |
| "grad_norm": 3.4318349361419678, | |
| "learning_rate": 1.9039892931234434e-05, | |
| "loss": 0.499, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.412336290663287, | |
| "grad_norm": 2.174170970916748, | |
| "learning_rate": 1.9005759052466303e-05, | |
| "loss": 0.415, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.4461343472750317, | |
| "grad_norm": 2.9727699756622314, | |
| "learning_rate": 1.897106066001509e-05, | |
| "loss": 0.5141, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.4799324038867763, | |
| "grad_norm": 3.2721505165100098, | |
| "learning_rate": 1.8935799928902046e-05, | |
| "loss": 0.5301, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.5137304604985213, | |
| "grad_norm": 2.574387311935425, | |
| "learning_rate": 1.8899979069397858e-05, | |
| "loss": 0.4762, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.5475285171102664, | |
| "grad_norm": 3.4737517833709717, | |
| "learning_rate": 1.8863600326884085e-05, | |
| "loss": 0.3734, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.581326573722011, | |
| "grad_norm": 3.07442045211792, | |
| "learning_rate": 1.882666598171242e-05, | |
| "loss": 0.6237, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.6151246303337556, | |
| "grad_norm": 2.256251573562622, | |
| "learning_rate": 1.8789178349061755e-05, | |
| "loss": 0.4447, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.6489226869455007, | |
| "grad_norm": 2.587085008621216, | |
| "learning_rate": 1.8751139778793043e-05, | |
| "loss": 0.4351, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.6827207435572453, | |
| "grad_norm": 2.929131507873535, | |
| "learning_rate": 1.871255265530201e-05, | |
| "loss": 0.4799, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.7165188001689904, | |
| "grad_norm": 2.4406521320343018, | |
| "learning_rate": 1.8673419397369693e-05, | |
| "loss": 0.3568, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.750316856780735, | |
| "grad_norm": 3.0470123291015625, | |
| "learning_rate": 1.863374245801082e-05, | |
| "loss": 0.535, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.78411491339248, | |
| "grad_norm": 1.8549753427505493, | |
| "learning_rate": 1.8593524324320035e-05, | |
| "loss": 0.3995, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.8179129700042247, | |
| "grad_norm": 3.1754539012908936, | |
| "learning_rate": 1.855276751731602e-05, | |
| "loss": 0.4495, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.8517110266159698, | |
| "grad_norm": 2.436633825302124, | |
| "learning_rate": 1.8511474591783454e-05, | |
| "loss": 0.4472, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.8855090832277144, | |
| "grad_norm": 2.15982985496521, | |
| "learning_rate": 1.8469648136112867e-05, | |
| "loss": 0.5069, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.919307139839459, | |
| "grad_norm": 2.9572179317474365, | |
| "learning_rate": 1.8427290772138397e-05, | |
| "loss": 0.4933, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.953105196451204, | |
| "grad_norm": 2.8051276206970215, | |
| "learning_rate": 1.838440515497345e-05, | |
| "loss": 0.39, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.986903253062949, | |
| "grad_norm": 3.173710823059082, | |
| "learning_rate": 1.8340993972844252e-05, | |
| "loss": 0.4061, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.020278833967047, | |
| "grad_norm": 2.855424404144287, | |
| "learning_rate": 1.8297059946921357e-05, | |
| "loss": 0.3861, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 4.054076890578791, | |
| "grad_norm": 2.7455055713653564, | |
| "learning_rate": 1.8252605831149052e-05, | |
| "loss": 0.3595, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.087874947190537, | |
| "grad_norm": 3.2334115505218506, | |
| "learning_rate": 1.8207634412072765e-05, | |
| "loss": 0.346, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 4.1216730038022815, | |
| "grad_norm": 2.810620069503784, | |
| "learning_rate": 1.816214850866436e-05, | |
| "loss": 0.4259, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.155471060414026, | |
| "grad_norm": 2.4240875244140625, | |
| "learning_rate": 1.811615097214545e-05, | |
| "loss": 0.4007, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 4.189269117025771, | |
| "grad_norm": 3.068871021270752, | |
| "learning_rate": 1.8069644685808673e-05, | |
| "loss": 0.2978, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.223067173637516, | |
| "grad_norm": 2.1824235916137695, | |
| "learning_rate": 1.8022632564836948e-05, | |
| "loss": 0.3693, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.256865230249261, | |
| "grad_norm": 3.2515642642974854, | |
| "learning_rate": 1.797511755612075e-05, | |
| "loss": 0.4717, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.2906632868610055, | |
| "grad_norm": 3.5332624912261963, | |
| "learning_rate": 1.7927102638073384e-05, | |
| "loss": 0.4488, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.32446134347275, | |
| "grad_norm": 2.8152706623077393, | |
| "learning_rate": 1.7878590820444283e-05, | |
| "loss": 0.3908, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.358259400084495, | |
| "grad_norm": 3.03226375579834, | |
| "learning_rate": 1.7829585144130356e-05, | |
| "loss": 0.3771, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.39205745669624, | |
| "grad_norm": 3.0809173583984375, | |
| "learning_rate": 1.7780088680985365e-05, | |
| "loss": 0.3708, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.425855513307985, | |
| "grad_norm": 3.259047269821167, | |
| "learning_rate": 1.773010453362737e-05, | |
| "loss": 0.4393, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.4596535699197295, | |
| "grad_norm": 2.542726993560791, | |
| "learning_rate": 1.7679635835244256e-05, | |
| "loss": 0.4462, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.493451626531474, | |
| "grad_norm": 2.5668067932128906, | |
| "learning_rate": 1.762868574939732e-05, | |
| "loss": 0.3585, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.52724968314322, | |
| "grad_norm": 2.9174365997314453, | |
| "learning_rate": 1.7577257469822976e-05, | |
| "loss": 0.3732, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.561047739754964, | |
| "grad_norm": 2.1858620643615723, | |
| "learning_rate": 1.7525354220232558e-05, | |
| "loss": 0.4202, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.594845796366709, | |
| "grad_norm": 3.092898368835449, | |
| "learning_rate": 1.747297925411024e-05, | |
| "loss": 0.4174, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.6286438529784535, | |
| "grad_norm": 2.1292641162872314, | |
| "learning_rate": 1.742013585450911e-05, | |
| "loss": 0.2891, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.662441909590198, | |
| "grad_norm": 3.4500226974487305, | |
| "learning_rate": 1.736682733384536e-05, | |
| "loss": 0.3446, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.696239966201944, | |
| "grad_norm": 2.490712881088257, | |
| "learning_rate": 1.7313057033690662e-05, | |
| "loss": 0.273, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.730038022813688, | |
| "grad_norm": 3.1903836727142334, | |
| "learning_rate": 1.7258828324562705e-05, | |
| "loss": 0.3976, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.763836079425433, | |
| "grad_norm": 2.6504249572753906, | |
| "learning_rate": 1.7204144605713922e-05, | |
| "loss": 0.351, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.7976341360371775, | |
| "grad_norm": 2.951176643371582, | |
| "learning_rate": 1.7149009304918392e-05, | |
| "loss": 0.3601, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.831432192648923, | |
| "grad_norm": 4.028046131134033, | |
| "learning_rate": 1.7093425878257007e-05, | |
| "loss": 0.4412, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.865230249260668, | |
| "grad_norm": 3.4209461212158203, | |
| "learning_rate": 1.7037397809900807e-05, | |
| "loss": 0.4239, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.899028305872412, | |
| "grad_norm": 2.396829605102539, | |
| "learning_rate": 1.698092861189259e-05, | |
| "loss": 0.3325, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.932826362484157, | |
| "grad_norm": 2.638688564300537, | |
| "learning_rate": 1.6924021823926766e-05, | |
| "loss": 0.3053, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.966624419095902, | |
| "grad_norm": 3.0459437370300293, | |
| "learning_rate": 1.6866681013127466e-05, | |
| "loss": 0.2785, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.051104545593262, | |
| "learning_rate": 1.6808909773824952e-05, | |
| "loss": 0.2148, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 5.033798056611745, | |
| "grad_norm": 2.424513816833496, | |
| "learning_rate": 1.675071172733031e-05, | |
| "loss": 0.3102, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 5.067596113223489, | |
| "grad_norm": 2.9347620010375977, | |
| "learning_rate": 1.669209052170845e-05, | |
| "loss": 0.2635, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.101394169835235, | |
| "grad_norm": 2.5299954414367676, | |
| "learning_rate": 1.6633049831549424e-05, | |
| "loss": 0.2556, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 5.135192226446979, | |
| "grad_norm": 3.3548402786254883, | |
| "learning_rate": 1.657359335773812e-05, | |
| "loss": 0.3626, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.168990283058724, | |
| "grad_norm": 3.0583834648132324, | |
| "learning_rate": 1.6513724827222225e-05, | |
| "loss": 0.3778, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 5.202788339670469, | |
| "grad_norm": 2.3884308338165283, | |
| "learning_rate": 1.645344799277866e-05, | |
| "loss": 0.3429, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.236586396282214, | |
| "grad_norm": 3.5502490997314453, | |
| "learning_rate": 1.639276663277831e-05, | |
| "loss": 0.3531, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 5.270384452893959, | |
| "grad_norm": 2.881547212600708, | |
| "learning_rate": 1.6331684550949197e-05, | |
| "loss": 0.2784, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.304182509505703, | |
| "grad_norm": 2.110593795776367, | |
| "learning_rate": 1.627020557613803e-05, | |
| "loss": 0.3011, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 5.337980566117448, | |
| "grad_norm": 3.2138075828552246, | |
| "learning_rate": 1.6208333562070232e-05, | |
| "loss": 0.3218, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.3717786227291935, | |
| "grad_norm": 2.4348948001861572, | |
| "learning_rate": 1.614607238710833e-05, | |
| "loss": 0.2419, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 5.405576679340938, | |
| "grad_norm": 3.6023876667022705, | |
| "learning_rate": 1.6083425954008883e-05, | |
| "loss": 0.3198, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.439374735952683, | |
| "grad_norm": 3.171356201171875, | |
| "learning_rate": 1.602039818967783e-05, | |
| "loss": 0.3377, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 5.473172792564427, | |
| "grad_norm": 2.926022529602051, | |
| "learning_rate": 1.5956993044924334e-05, | |
| "loss": 0.2398, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.506970849176172, | |
| "grad_norm": 2.8738198280334473, | |
| "learning_rate": 1.589321449421313e-05, | |
| "loss": 0.2829, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.5407689057879175, | |
| "grad_norm": 3.6972992420196533, | |
| "learning_rate": 1.5829066535415402e-05, | |
| "loss": 0.3569, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.574566962399662, | |
| "grad_norm": 3.0152523517608643, | |
| "learning_rate": 1.576455318955816e-05, | |
| "loss": 0.2925, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.608365019011407, | |
| "grad_norm": 2.8930368423461914, | |
| "learning_rate": 1.569967850057222e-05, | |
| "loss": 0.3363, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.642163075623151, | |
| "grad_norm": 3.1284563541412354, | |
| "learning_rate": 1.5634446535038688e-05, | |
| "loss": 0.3218, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.675961132234897, | |
| "grad_norm": 1.6916499137878418, | |
| "learning_rate": 1.556886138193406e-05, | |
| "loss": 0.2436, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.7097591888466415, | |
| "grad_norm": 3.7334420680999756, | |
| "learning_rate": 1.5502927152373913e-05, | |
| "loss": 0.2874, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.743557245458386, | |
| "grad_norm": 3.914621591567993, | |
| "learning_rate": 1.5436647979355214e-05, | |
| "loss": 0.2329, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.777355302070131, | |
| "grad_norm": 3.38970685005188, | |
| "learning_rate": 1.5370028017497217e-05, | |
| "loss": 0.3232, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.811153358681876, | |
| "grad_norm": 2.7700934410095215, | |
| "learning_rate": 1.5303071442781083e-05, | |
| "loss": 0.2951, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.844951415293621, | |
| "grad_norm": 3.382173538208008, | |
| "learning_rate": 1.5235782452288068e-05, | |
| "loss": 0.2719, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.8787494719053655, | |
| "grad_norm": 3.8175547122955322, | |
| "learning_rate": 1.5168165263936472e-05, | |
| "loss": 0.3171, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.91254752851711, | |
| "grad_norm": 3.3271560668945312, | |
| "learning_rate": 1.5100224116217217e-05, | |
| "loss": 0.2364, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.946345585128855, | |
| "grad_norm": 2.9731876850128174, | |
| "learning_rate": 1.5031963267928185e-05, | |
| "loss": 0.2103, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.9801436417406, | |
| "grad_norm": 3.461787700653076, | |
| "learning_rate": 1.4963386997907242e-05, | |
| "loss": 0.341, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 6.013519222644698, | |
| "grad_norm": 3.172473669052124, | |
| "learning_rate": 1.4894499604764035e-05, | |
| "loss": 0.2618, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 6.0473172792564425, | |
| "grad_norm": 2.9784677028656006, | |
| "learning_rate": 1.4825305406610547e-05, | |
| "loss": 0.2903, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 6.081115335868188, | |
| "grad_norm": 3.697354555130005, | |
| "learning_rate": 1.4755808740790403e-05, | |
| "loss": 0.2625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.114913392479933, | |
| "grad_norm": 3.192431926727295, | |
| "learning_rate": 1.4686013963607e-05, | |
| "loss": 0.233, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 6.148711449091677, | |
| "grad_norm": 2.8318302631378174, | |
| "learning_rate": 1.4615925450050448e-05, | |
| "loss": 0.1387, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.182509505703422, | |
| "grad_norm": 3.418325901031494, | |
| "learning_rate": 1.4545547593523308e-05, | |
| "loss": 0.3177, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 6.2163075623151665, | |
| "grad_norm": 3.188663959503174, | |
| "learning_rate": 1.4474884805565217e-05, | |
| "loss": 0.2066, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 6.250105618926912, | |
| "grad_norm": 2.2658884525299072, | |
| "learning_rate": 1.4403941515576344e-05, | |
| "loss": 0.2959, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 6.283903675538657, | |
| "grad_norm": 2.798861265182495, | |
| "learning_rate": 1.4332722170539748e-05, | |
| "loss": 0.2784, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 6.317701732150401, | |
| "grad_norm": 3.2030510902404785, | |
| "learning_rate": 1.4261231234742618e-05, | |
| "loss": 0.224, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 6.351499788762146, | |
| "grad_norm": 3.1087892055511475, | |
| "learning_rate": 1.4189473189496437e-05, | |
| "loss": 0.271, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 6.385297845373891, | |
| "grad_norm": 3.4298338890075684, | |
| "learning_rate": 1.4117452532856084e-05, | |
| "loss": 0.1972, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 6.419095901985636, | |
| "grad_norm": 2.693760633468628, | |
| "learning_rate": 1.4045173779337866e-05, | |
| "loss": 0.3036, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.452893958597381, | |
| "grad_norm": 3.6742842197418213, | |
| "learning_rate": 1.3972641459636548e-05, | |
| "loss": 0.276, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 6.486692015209125, | |
| "grad_norm": 2.9099996089935303, | |
| "learning_rate": 1.3899860120341338e-05, | |
| "loss": 0.2841, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.52049007182087, | |
| "grad_norm": 2.4859213829040527, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 0.2752, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 6.554288128432615, | |
| "grad_norm": 2.6533761024475098, | |
| "learning_rate": 1.3753568647087372e-05, | |
| "loss": 0.212, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.58808618504436, | |
| "grad_norm": 2.8711912631988525, | |
| "learning_rate": 1.3680067683209438e-05, | |
| "loss": 0.2039, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.621884241656105, | |
| "grad_norm": 3.615388870239258, | |
| "learning_rate": 1.3606336039324439e-05, | |
| "loss": 0.1882, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.65568229826785, | |
| "grad_norm": 2.813685655593872, | |
| "learning_rate": 1.353237833719958e-05, | |
| "loss": 0.2237, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.689480354879595, | |
| "grad_norm": 3.288862466812134, | |
| "learning_rate": 1.3458199212772227e-05, | |
| "loss": 0.2177, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.723278411491339, | |
| "grad_norm": 3.3833813667297363, | |
| "learning_rate": 1.3383803315859281e-05, | |
| "loss": 0.2406, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.757076468103084, | |
| "grad_norm": 3.7307562828063965, | |
| "learning_rate": 1.3309195309865746e-05, | |
| "loss": 0.1924, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.7908745247148286, | |
| "grad_norm": 3.9301440715789795, | |
| "learning_rate": 1.3234379871492381e-05, | |
| "loss": 0.2912, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.824672581326574, | |
| "grad_norm": 1.9294644594192505, | |
| "learning_rate": 1.315936169044257e-05, | |
| "loss": 0.2257, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.858470637938319, | |
| "grad_norm": 3.4223814010620117, | |
| "learning_rate": 1.3084145469128343e-05, | |
| "loss": 0.2205, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.892268694550063, | |
| "grad_norm": 3.395117998123169, | |
| "learning_rate": 1.3008735922375607e-05, | |
| "loss": 0.2059, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.926066751161808, | |
| "grad_norm": 3.7277326583862305, | |
| "learning_rate": 1.2933137777128607e-05, | |
| "loss": 0.2599, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.9598648077735525, | |
| "grad_norm": 2.926193952560425, | |
| "learning_rate": 1.2857355772153637e-05, | |
| "loss": 0.2058, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.993662864385298, | |
| "grad_norm": 2.551806926727295, | |
| "learning_rate": 1.2781394657741988e-05, | |
| "loss": 0.3004, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 7.027038445289396, | |
| "grad_norm": 3.521486759185791, | |
| "learning_rate": 1.2705259195412168e-05, | |
| "loss": 0.1499, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 7.06083650190114, | |
| "grad_norm": 3.246941089630127, | |
| "learning_rate": 1.2628954157611449e-05, | |
| "loss": 0.2174, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 7.094634558512886, | |
| "grad_norm": 2.2454280853271484, | |
| "learning_rate": 1.255248432741672e-05, | |
| "loss": 0.1209, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 7.1284326151246304, | |
| "grad_norm": 2.4737725257873535, | |
| "learning_rate": 1.2475854498234647e-05, | |
| "loss": 0.1727, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 7.162230671736375, | |
| "grad_norm": 2.819976329803467, | |
| "learning_rate": 1.239906947350121e-05, | |
| "loss": 0.2555, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 7.19602872834812, | |
| "grad_norm": 2.772263765335083, | |
| "learning_rate": 1.2322134066380622e-05, | |
| "loss": 0.2112, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 7.229826784959865, | |
| "grad_norm": 3.721599817276001, | |
| "learning_rate": 1.22450530994636e-05, | |
| "loss": 0.3326, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 7.26362484157161, | |
| "grad_norm": 2.8285434246063232, | |
| "learning_rate": 1.2167831404465078e-05, | |
| "loss": 0.2237, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 7.297422898183354, | |
| "grad_norm": 3.2905073165893555, | |
| "learning_rate": 1.2090473821921343e-05, | |
| "loss": 0.1998, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.331220954795099, | |
| "grad_norm": 2.5703885555267334, | |
| "learning_rate": 1.2012985200886602e-05, | |
| "loss": 0.2402, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 7.365019011406844, | |
| "grad_norm": 3.2286860942840576, | |
| "learning_rate": 1.1935370398629033e-05, | |
| "loss": 0.1771, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 7.398817068018589, | |
| "grad_norm": 3.355846881866455, | |
| "learning_rate": 1.185763428032631e-05, | |
| "loss": 0.2184, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 7.432615124630334, | |
| "grad_norm": 2.6862475872039795, | |
| "learning_rate": 1.1779781718760641e-05, | |
| "loss": 0.212, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 7.466413181242078, | |
| "grad_norm": 3.8962576389312744, | |
| "learning_rate": 1.1701817594013312e-05, | |
| "loss": 0.214, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 7.500211237853823, | |
| "grad_norm": 3.2958405017852783, | |
| "learning_rate": 1.1623746793158803e-05, | |
| "loss": 0.2378, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 7.5340092944655686, | |
| "grad_norm": 2.6480026245117188, | |
| "learning_rate": 1.1545574209958433e-05, | |
| "loss": 0.1399, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 7.567807351077313, | |
| "grad_norm": 3.944840669631958, | |
| "learning_rate": 1.1467304744553618e-05, | |
| "loss": 0.2823, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.601605407689058, | |
| "grad_norm": 4.2091498374938965, | |
| "learning_rate": 1.1388943303158692e-05, | |
| "loss": 0.1703, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 7.635403464300802, | |
| "grad_norm": 4.504730701446533, | |
| "learning_rate": 1.1310494797753382e-05, | |
| "loss": 0.1969, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.669201520912548, | |
| "grad_norm": 3.6243932247161865, | |
| "learning_rate": 1.1231964145774906e-05, | |
| "loss": 0.2886, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 7.7029995775242925, | |
| "grad_norm": 3.16015887260437, | |
| "learning_rate": 1.1153356269809721e-05, | |
| "loss": 0.1156, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.736797634136037, | |
| "grad_norm": 3.0954883098602295, | |
| "learning_rate": 1.1074676097284973e-05, | |
| "loss": 0.1634, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.770595690747782, | |
| "grad_norm": 3.1873254776000977, | |
| "learning_rate": 1.0995928560159608e-05, | |
| "loss": 0.2507, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.804393747359526, | |
| "grad_norm": 3.6099650859832764, | |
| "learning_rate": 1.0917118594615237e-05, | |
| "loss": 0.2474, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.838191803971272, | |
| "grad_norm": 3.4526472091674805, | |
| "learning_rate": 1.0838251140746717e-05, | |
| "loss": 0.1501, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.8719898605830165, | |
| "grad_norm": 2.2834644317626953, | |
| "learning_rate": 1.0759331142252463e-05, | |
| "loss": 0.1648, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.905787917194761, | |
| "grad_norm": 3.0223686695098877, | |
| "learning_rate": 1.0680363546124599e-05, | |
| "loss": 0.1598, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.939585973806506, | |
| "grad_norm": 3.2281494140625, | |
| "learning_rate": 1.060135330233883e-05, | |
| "loss": 0.1681, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.973384030418251, | |
| "grad_norm": 3.3291306495666504, | |
| "learning_rate": 1.0522305363544172e-05, | |
| "loss": 0.1202, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 8.00675961132235, | |
| "grad_norm": 2.6950342655181885, | |
| "learning_rate": 1.04432246847525e-05, | |
| "loss": 0.2243, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 8.040557667934094, | |
| "grad_norm": 3.4718968868255615, | |
| "learning_rate": 1.0364116223027956e-05, | |
| "loss": 0.1996, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 8.074355724545839, | |
| "grad_norm": 3.3445370197296143, | |
| "learning_rate": 1.0284984937176213e-05, | |
| "loss": 0.2244, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 8.108153781157583, | |
| "grad_norm": 2.8722851276397705, | |
| "learning_rate": 1.0205835787433645e-05, | |
| "loss": 0.099, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.141951837769328, | |
| "grad_norm": 2.5152461528778076, | |
| "learning_rate": 1.0126673735156402e-05, | |
| "loss": 0.1599, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 8.175749894381074, | |
| "grad_norm": 3.2663590908050537, | |
| "learning_rate": 1.0047503742509405e-05, | |
| "loss": 0.2148, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 8.209547950992818, | |
| "grad_norm": 2.693246603012085, | |
| "learning_rate": 9.968330772155312e-06, | |
| "loss": 0.219, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 8.243346007604563, | |
| "grad_norm": 3.533890962600708, | |
| "learning_rate": 9.889159786943428e-06, | |
| "loss": 0.1133, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 8.277144064216307, | |
| "grad_norm": 2.7618963718414307, | |
| "learning_rate": 9.809995749598633e-06, | |
| "loss": 0.1692, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 8.310942120828052, | |
| "grad_norm": 2.682603120803833, | |
| "learning_rate": 9.730843622410282e-06, | |
| "loss": 0.2291, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 8.344740177439798, | |
| "grad_norm": 2.9029886722564697, | |
| "learning_rate": 9.651708366921152e-06, | |
| "loss": 0.165, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 8.378538234051542, | |
| "grad_norm": 2.932377576828003, | |
| "learning_rate": 9.572594943616457e-06, | |
| "loss": 0.1651, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 8.412336290663287, | |
| "grad_norm": 3.0703186988830566, | |
| "learning_rate": 9.493508311612874e-06, | |
| "loss": 0.1969, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 8.446134347275033, | |
| "grad_norm": 2.8268532752990723, | |
| "learning_rate": 9.414453428347715e-06, | |
| "loss": 0.1747, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 8.479932403886776, | |
| "grad_norm": 2.9563803672790527, | |
| "learning_rate": 9.335435249268165e-06, | |
| "loss": 0.1082, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 8.513730460498522, | |
| "grad_norm": 3.163346767425537, | |
| "learning_rate": 9.256458727520648e-06, | |
| "loss": 0.1776, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.547528517110266, | |
| "grad_norm": 3.5345945358276367, | |
| "learning_rate": 9.177528813640362e-06, | |
| "loss": 0.1194, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 8.581326573722011, | |
| "grad_norm": 3.074373722076416, | |
| "learning_rate": 9.098650455240959e-06, | |
| "loss": 0.197, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 8.615124630333757, | |
| "grad_norm": 3.080812454223633, | |
| "learning_rate": 9.019828596704394e-06, | |
| "loss": 0.1218, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 8.6489226869455, | |
| "grad_norm": 3.2213311195373535, | |
| "learning_rate": 8.941068178871021e-06, | |
| "loss": 0.1822, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.682720743557246, | |
| "grad_norm": 2.857954740524292, | |
| "learning_rate": 8.862374138729854e-06, | |
| "loss": 0.1687, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 8.71651880016899, | |
| "grad_norm": 2.9493982791900635, | |
| "learning_rate": 8.783751409109116e-06, | |
| "loss": 0.1393, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.750316856780735, | |
| "grad_norm": 1.754936695098877, | |
| "learning_rate": 8.705204918367032e-06, | |
| "loss": 0.1846, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 8.78411491339248, | |
| "grad_norm": 4.011746406555176, | |
| "learning_rate": 8.626739590082897e-06, | |
| "loss": 0.1897, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.817912970004224, | |
| "grad_norm": 3.003286361694336, | |
| "learning_rate": 8.54836034274844e-06, | |
| "loss": 0.1873, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.85171102661597, | |
| "grad_norm": 3.0416910648345947, | |
| "learning_rate": 8.47007208945953e-06, | |
| "loss": 0.1263, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.885509083227713, | |
| "grad_norm": 3.4020864963531494, | |
| "learning_rate": 8.391879737608202e-06, | |
| "loss": 0.1536, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.919307139839459, | |
| "grad_norm": 2.8439645767211914, | |
| "learning_rate": 8.313788188575032e-06, | |
| "loss": 0.1835, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.953105196451205, | |
| "grad_norm": 2.475952386856079, | |
| "learning_rate": 8.23580233742192e-06, | |
| "loss": 0.1275, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.986903253062948, | |
| "grad_norm": 3.099142551422119, | |
| "learning_rate": 8.15792707258522e-06, | |
| "loss": 0.1355, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 9.020278833967048, | |
| "grad_norm": 2.5242106914520264, | |
| "learning_rate": 8.08016727556936e-06, | |
| "loss": 0.1135, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 9.054076890578791, | |
| "grad_norm": 2.4750607013702393, | |
| "learning_rate": 8.002527820640809e-06, | |
| "loss": 0.1477, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 9.087874947190537, | |
| "grad_norm": 2.5990304946899414, | |
| "learning_rate": 7.925013574522556e-06, | |
| "loss": 0.1125, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 9.12167300380228, | |
| "grad_norm": 2.2538115978240967, | |
| "learning_rate": 7.847629396089054e-06, | |
| "loss": 0.1967, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 9.155471060414026, | |
| "grad_norm": 2.93662691116333, | |
| "learning_rate": 7.770380136061643e-06, | |
| "loss": 0.1963, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 9.189269117025772, | |
| "grad_norm": 3.2367334365844727, | |
| "learning_rate": 7.693270636704476e-06, | |
| "loss": 0.0882, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 9.223067173637515, | |
| "grad_norm": 2.297624349594116, | |
| "learning_rate": 7.616305731521009e-06, | |
| "loss": 0.1547, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 9.256865230249261, | |
| "grad_norm": 3.3643083572387695, | |
| "learning_rate": 7.539490244951013e-06, | |
| "loss": 0.1491, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 9.290663286861005, | |
| "grad_norm": 2.270787477493286, | |
| "learning_rate": 7.462828992068144e-06, | |
| "loss": 0.1255, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 9.32446134347275, | |
| "grad_norm": 2.6333799362182617, | |
| "learning_rate": 7.386326778278142e-06, | |
| "loss": 0.1117, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 9.358259400084496, | |
| "grad_norm": 2.613737106323242, | |
| "learning_rate": 7.3099883990176025e-06, | |
| "loss": 0.1612, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 9.39205745669624, | |
| "grad_norm": 1.7052559852600098, | |
| "learning_rate": 7.233818639453358e-06, | |
| "loss": 0.1471, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 9.425855513307985, | |
| "grad_norm": 3.2761054039001465, | |
| "learning_rate": 7.15782227418257e-06, | |
| "loss": 0.122, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 9.45965356991973, | |
| "grad_norm": 2.652831792831421, | |
| "learning_rate": 7.0820040669333975e-06, | |
| "loss": 0.1438, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 9.493451626531474, | |
| "grad_norm": 3.1051905155181885, | |
| "learning_rate": 7.006368770266421e-06, | |
| "loss": 0.1396, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 9.52724968314322, | |
| "grad_norm": 2.8987197875976562, | |
| "learning_rate": 6.930921125276715e-06, | |
| "loss": 0.1714, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 9.561047739754963, | |
| "grad_norm": 3.5985753536224365, | |
| "learning_rate": 6.855665861296662e-06, | |
| "loss": 0.1221, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 9.594845796366709, | |
| "grad_norm": 3.5496666431427, | |
| "learning_rate": 6.78060769559951e-06, | |
| "loss": 0.1261, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 9.628643852978454, | |
| "grad_norm": 2.57647442817688, | |
| "learning_rate": 6.705751333103676e-06, | |
| "loss": 0.132, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 9.662441909590198, | |
| "grad_norm": 2.8501367568969727, | |
| "learning_rate": 6.631101466077801e-06, | |
| "loss": 0.1463, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 9.696239966201944, | |
| "grad_norm": 2.449470043182373, | |
| "learning_rate": 6.556662773846658e-06, | |
| "loss": 0.1387, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 9.730038022813687, | |
| "grad_norm": 3.8504765033721924, | |
| "learning_rate": 6.48243992249781e-06, | |
| "loss": 0.1906, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 9.763836079425433, | |
| "grad_norm": 2.4857442378997803, | |
| "learning_rate": 6.40843756458913e-06, | |
| "loss": 0.1024, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 9.797634136037178, | |
| "grad_norm": 3.8078644275665283, | |
| "learning_rate": 6.3346603388571605e-06, | |
| "loss": 0.1211, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 9.831432192648922, | |
| "grad_norm": 2.8603129386901855, | |
| "learning_rate": 6.261112869926348e-06, | |
| "loss": 0.0645, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 9.865230249260668, | |
| "grad_norm": 2.669579267501831, | |
| "learning_rate": 6.187799768019134e-06, | |
| "loss": 0.194, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 9.899028305872413, | |
| "grad_norm": 2.163553237915039, | |
| "learning_rate": 6.114725628666997e-06, | |
| "loss": 0.1371, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 9.932826362484157, | |
| "grad_norm": 3.211575984954834, | |
| "learning_rate": 6.041895032422377e-06, | |
| "loss": 0.1427, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.966624419095902, | |
| "grad_norm": 3.1249096393585205, | |
| "learning_rate": 5.969312544571529e-06, | |
| "loss": 0.1482, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.95405912399292, | |
| "learning_rate": 5.8969827148483935e-06, | |
| "loss": 0.1493, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 10.033798056611746, | |
| "grad_norm": 2.1418049335479736, | |
| "learning_rate": 5.824910077149372e-06, | |
| "loss": 0.1223, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 10.06759611322349, | |
| "grad_norm": 2.2330262660980225, | |
| "learning_rate": 5.753099149249133e-06, | |
| "loss": 0.1569, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 10.101394169835235, | |
| "grad_norm": 2.517437696456909, | |
| "learning_rate": 5.681554432517435e-06, | |
| "loss": 0.0826, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 10.135192226446978, | |
| "grad_norm": 2.317457675933838, | |
| "learning_rate": 5.610280411636941e-06, | |
| "loss": 0.1024, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.168990283058724, | |
| "grad_norm": 3.2839527130126953, | |
| "learning_rate": 5.539281554322126e-06, | |
| "loss": 0.1484, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 10.20278833967047, | |
| "grad_norm": 3.0793209075927734, | |
| "learning_rate": 5.468562311039205e-06, | |
| "loss": 0.1529, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 10.236586396282213, | |
| "grad_norm": 2.524780035018921, | |
| "learning_rate": 5.3981271147271786e-06, | |
| "loss": 0.09, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 10.270384452893959, | |
| "grad_norm": 2.0456202030181885, | |
| "learning_rate": 5.327980380519942e-06, | |
| "loss": 0.1159, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 10.304182509505704, | |
| "grad_norm": 2.448542356491089, | |
| "learning_rate": 5.25812650546955e-06, | |
| "loss": 0.1431, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 10.337980566117448, | |
| "grad_norm": 1.669090986251831, | |
| "learning_rate": 5.188569868270566e-06, | |
| "loss": 0.1234, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 10.371778622729193, | |
| "grad_norm": 3.0153934955596924, | |
| "learning_rate": 5.11931482898562e-06, | |
| "loss": 0.1086, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 10.405576679340937, | |
| "grad_norm": 3.3632757663726807, | |
| "learning_rate": 5.050365728772084e-06, | |
| "loss": 0.1114, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 10.439374735952683, | |
| "grad_norm": 2.883791208267212, | |
| "learning_rate": 4.981726889609952e-06, | |
| "loss": 0.1465, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 10.473172792564428, | |
| "grad_norm": 1.6629996299743652, | |
| "learning_rate": 4.913402614030944e-06, | |
| "loss": 0.0823, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 10.506970849176172, | |
| "grad_norm": 2.789846658706665, | |
| "learning_rate": 4.84539718484877e-06, | |
| "loss": 0.133, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 10.540768905787917, | |
| "grad_norm": 2.095916509628296, | |
| "learning_rate": 4.77771486489071e-06, | |
| "loss": 0.0988, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 10.574566962399661, | |
| "grad_norm": 2.670482635498047, | |
| "learning_rate": 4.710359896730379e-06, | |
| "loss": 0.1166, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 10.608365019011407, | |
| "grad_norm": 1.432079553604126, | |
| "learning_rate": 4.643336502421783e-06, | |
| "loss": 0.1624, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 10.642163075623152, | |
| "grad_norm": 2.3370885848999023, | |
| "learning_rate": 4.576648883234686e-06, | |
| "loss": 0.1007, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 10.675961132234896, | |
| "grad_norm": 3.077364921569824, | |
| "learning_rate": 4.510301219391245e-06, | |
| "loss": 0.095, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 10.709759188846641, | |
| "grad_norm": 3.081515312194824, | |
| "learning_rate": 4.444297669803981e-06, | |
| "loss": 0.1086, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 10.743557245458387, | |
| "grad_norm": 3.574352502822876, | |
| "learning_rate": 4.378642371815078e-06, | |
| "loss": 0.1501, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 10.77735530207013, | |
| "grad_norm": 2.738147735595703, | |
| "learning_rate": 4.313339440937055e-06, | |
| "loss": 0.1719, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 10.811153358681876, | |
| "grad_norm": 2.235377073287964, | |
| "learning_rate": 4.248392970594774e-06, | |
| "loss": 0.1176, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 10.84495141529362, | |
| "grad_norm": 2.95943021774292, | |
| "learning_rate": 4.18380703186886e-06, | |
| "loss": 0.1334, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 10.878749471905365, | |
| "grad_norm": 1.9108000993728638, | |
| "learning_rate": 4.1195856732405094e-06, | |
| "loss": 0.113, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 10.912547528517111, | |
| "grad_norm": 2.856457233428955, | |
| "learning_rate": 4.055732920337699e-06, | |
| "loss": 0.1027, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 10.946345585128855, | |
| "grad_norm": 2.5498857498168945, | |
| "learning_rate": 3.992252775682877e-06, | |
| "loss": 0.0869, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 10.9801436417406, | |
| "grad_norm": 2.5696861743927, | |
| "learning_rate": 3.929149218442052e-06, | |
| "loss": 0.1553, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 11.013519222644698, | |
| "grad_norm": 1.5783302783966064, | |
| "learning_rate": 3.866426204175353e-06, | |
| "loss": 0.1055, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 11.047317279256443, | |
| "grad_norm": 2.1971595287323, | |
| "learning_rate": 3.804087664589108e-06, | |
| "loss": 0.1169, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 11.081115335868187, | |
| "grad_norm": 2.1792209148406982, | |
| "learning_rate": 3.742137507289363e-06, | |
| "loss": 0.1408, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 11.114913392479933, | |
| "grad_norm": 2.117349147796631, | |
| "learning_rate": 3.680579615536961e-06, | |
| "loss": 0.0973, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 11.148711449091678, | |
| "grad_norm": 2.348695755004883, | |
| "learning_rate": 3.6194178480041174e-06, | |
| "loss": 0.0879, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 11.182509505703422, | |
| "grad_norm": 2.529822826385498, | |
| "learning_rate": 3.558656038532532e-06, | |
| "loss": 0.1049, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 11.216307562315167, | |
| "grad_norm": 1.6536489725112915, | |
| "learning_rate": 3.4982979958930896e-06, | |
| "loss": 0.0713, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 11.250105618926911, | |
| "grad_norm": 3.6709718704223633, | |
| "learning_rate": 3.4383475035471026e-06, | |
| "loss": 0.0843, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 11.283903675538657, | |
| "grad_norm": 2.0067543983459473, | |
| "learning_rate": 3.378808319409149e-06, | |
| "loss": 0.1148, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 11.317701732150402, | |
| "grad_norm": 2.263753890991211, | |
| "learning_rate": 3.319684175611517e-06, | |
| "loss": 0.1042, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 11.351499788762146, | |
| "grad_norm": 2.691466808319092, | |
| "learning_rate": 3.2609787782702595e-06, | |
| "loss": 0.0902, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 11.385297845373891, | |
| "grad_norm": 2.7062034606933594, | |
| "learning_rate": 3.2026958072528715e-06, | |
| "loss": 0.0978, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 11.419095901985635, | |
| "grad_norm": 2.082036256790161, | |
| "learning_rate": 3.1448389159476433e-06, | |
| "loss": 0.1192, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 11.45289395859738, | |
| "grad_norm": 1.7839562892913818, | |
| "learning_rate": 3.087411731034641e-06, | |
| "loss": 0.1098, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 11.486692015209126, | |
| "grad_norm": 2.078550100326538, | |
| "learning_rate": 3.0304178522583626e-06, | |
| "loss": 0.0822, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 11.52049007182087, | |
| "grad_norm": 1.636839747428894, | |
| "learning_rate": 2.973860852202117e-06, | |
| "loss": 0.0987, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 11.554288128432615, | |
| "grad_norm": 2.2059497833251953, | |
| "learning_rate": 2.917744276064056e-06, | |
| "loss": 0.1176, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 11.58808618504436, | |
| "grad_norm": 2.563145637512207, | |
| "learning_rate": 2.8620716414349714e-06, | |
| "loss": 0.1158, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 11.621884241656105, | |
| "grad_norm": 2.6411447525024414, | |
| "learning_rate": 2.806846438077787e-06, | |
| "loss": 0.1471, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 11.65568229826785, | |
| "grad_norm": 1.4738620519638062, | |
| "learning_rate": 2.7520721277088023e-06, | |
| "loss": 0.1833, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 11.689480354879594, | |
| "grad_norm": 1.8081343173980713, | |
| "learning_rate": 2.697752143780713e-06, | |
| "loss": 0.1188, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 11.72327841149134, | |
| "grad_norm": 1.6308510303497314, | |
| "learning_rate": 2.643889891267386e-06, | |
| "loss": 0.0962, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 11.757076468103085, | |
| "grad_norm": 2.0612504482269287, | |
| "learning_rate": 2.5904887464504115e-06, | |
| "loss": 0.0656, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 11.790874524714829, | |
| "grad_norm": 2.3865137100219727, | |
| "learning_rate": 2.537552056707483e-06, | |
| "loss": 0.1124, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 11.824672581326574, | |
| "grad_norm": 2.3273239135742188, | |
| "learning_rate": 2.4850831403025597e-06, | |
| "loss": 0.0682, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 11.858470637938318, | |
| "grad_norm": 2.371812105178833, | |
| "learning_rate": 2.433085286177872e-06, | |
| "loss": 0.0906, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 11.892268694550063, | |
| "grad_norm": 4.104214191436768, | |
| "learning_rate": 2.381561753747753e-06, | |
| "loss": 0.1273, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 11.926066751161809, | |
| "grad_norm": 2.1697592735290527, | |
| "learning_rate": 2.330515772694333e-06, | |
| "loss": 0.1251, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 11.959864807773553, | |
| "grad_norm": 3.299699068069458, | |
| "learning_rate": 2.279950542765078e-06, | |
| "loss": 0.0756, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 11.993662864385298, | |
| "grad_norm": 3.481651544570923, | |
| "learning_rate": 2.2298692335722403e-06, | |
| "loss": 0.1518, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 12.027038445289396, | |
| "grad_norm": 1.655312418937683, | |
| "learning_rate": 2.1802749843941583e-06, | |
| "loss": 0.084, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 12.060836501901141, | |
| "grad_norm": 1.347791075706482, | |
| "learning_rate": 2.1311709039784734e-06, | |
| "loss": 0.0561, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 12.094634558512885, | |
| "grad_norm": 1.9169631004333496, | |
| "learning_rate": 2.0825600703472814e-06, | |
| "loss": 0.1018, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 12.12843261512463, | |
| "grad_norm": 1.5145456790924072, | |
| "learning_rate": 2.0344455306041633e-06, | |
| "loss": 0.1338, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 12.162230671736376, | |
| "grad_norm": 1.8022133111953735, | |
| "learning_rate": 1.98683030074321e-06, | |
| "loss": 0.1331, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 12.19602872834812, | |
| "grad_norm": 2.502906084060669, | |
| "learning_rate": 1.939717365459952e-06, | |
| "loss": 0.0758, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 12.229826784959865, | |
| "grad_norm": 2.3254261016845703, | |
| "learning_rate": 1.8931096779642644e-06, | |
| "loss": 0.1571, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 12.263624841571609, | |
| "grad_norm": 1.831810474395752, | |
| "learning_rate": 1.847010159795265e-06, | |
| "loss": 0.1052, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 12.297422898183354, | |
| "grad_norm": 2.395282745361328, | |
| "learning_rate": 1.8014217006381728e-06, | |
| "loss": 0.057, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 12.3312209547951, | |
| "grad_norm": 2.0812599658966064, | |
| "learning_rate": 1.7563471581431623e-06, | |
| "loss": 0.0743, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 12.365019011406844, | |
| "grad_norm": 1.5092777013778687, | |
| "learning_rate": 1.7117893577462541e-06, | |
| "loss": 0.0733, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 12.39881706801859, | |
| "grad_norm": 2.1698033809661865, | |
| "learning_rate": 1.6677510924921958e-06, | |
| "loss": 0.099, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 12.432615124630333, | |
| "grad_norm": 2.5433080196380615, | |
| "learning_rate": 1.6242351228593833e-06, | |
| "loss": 0.0944, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 12.466413181242078, | |
| "grad_norm": 2.1289961338043213, | |
| "learning_rate": 1.5812441765868292e-06, | |
| "loss": 0.0881, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 12.500211237853824, | |
| "grad_norm": 1.7505559921264648, | |
| "learning_rate": 1.5387809485031745e-06, | |
| "loss": 0.065, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 12.534009294465568, | |
| "grad_norm": 2.4067695140838623, | |
| "learning_rate": 1.4968481003577628e-06, | |
| "loss": 0.0476, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 12.567807351077313, | |
| "grad_norm": 2.417130708694458, | |
| "learning_rate": 1.4554482606538044e-06, | |
| "loss": 0.1166, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 12.601605407689059, | |
| "grad_norm": 1.8488808870315552, | |
| "learning_rate": 1.4145840244835985e-06, | |
| "loss": 0.1015, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 12.635403464300802, | |
| "grad_norm": 2.4164412021636963, | |
| "learning_rate": 1.3742579533658729e-06, | |
| "loss": 0.0822, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 12.669201520912548, | |
| "grad_norm": 2.083436965942383, | |
| "learning_rate": 1.3344725750852183e-06, | |
| "loss": 0.1192, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 12.702999577524292, | |
| "grad_norm": 1.8657339811325073, | |
| "learning_rate": 1.2952303835336256e-06, | |
| "loss": 0.1488, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 12.736797634136037, | |
| "grad_norm": 2.0696699619293213, | |
| "learning_rate": 1.2565338385541792e-06, | |
| "loss": 0.0752, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 12.770595690747783, | |
| "grad_norm": 1.6756703853607178, | |
| "learning_rate": 1.2183853657868504e-06, | |
| "loss": 0.107, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 12.804393747359526, | |
| "grad_norm": 2.409106731414795, | |
| "learning_rate": 1.1807873565164507e-06, | |
| "loss": 0.0669, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 12.838191803971272, | |
| "grad_norm": 1.7135124206542969, | |
| "learning_rate": 1.1437421675227457e-06, | |
| "loss": 0.1809, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 12.871989860583017, | |
| "grad_norm": 1.9362844228744507, | |
| "learning_rate": 1.107252120932717e-06, | |
| "loss": 0.1153, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 12.905787917194761, | |
| "grad_norm": 1.499002456665039, | |
| "learning_rate": 1.0713195040750012e-06, | |
| "loss": 0.1103, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 12.939585973806507, | |
| "grad_norm": 2.998647689819336, | |
| "learning_rate": 1.035946569336519e-06, | |
| "loss": 0.089, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 12.97338403041825, | |
| "grad_norm": 2.5154922008514404, | |
| "learning_rate": 1.0011355340212802e-06, | |
| "loss": 0.1253, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 13.00675961132235, | |
| "grad_norm": 2.583174467086792, | |
| "learning_rate": 9.668885802114002e-07, | |
| "loss": 0.0991, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 13.040557667934094, | |
| "grad_norm": 2.039463758468628, | |
| "learning_rate": 9.33207854630317e-07, | |
| "loss": 0.063, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 13.074355724545839, | |
| "grad_norm": 2.5090322494506836, | |
| "learning_rate": 9.000954685082286e-07, | |
| "loss": 0.0839, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 13.108153781157583, | |
| "grad_norm": 1.2274693250656128, | |
| "learning_rate": 8.675534974497435e-07, | |
| "loss": 0.0393, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 13.141951837769328, | |
| "grad_norm": 1.7376823425292969, | |
| "learning_rate": 8.355839813037936e-07, | |
| "loss": 0.0899, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 13.175749894381074, | |
| "grad_norm": 1.657383918762207, | |
| "learning_rate": 8.041889240357493e-07, | |
| "loss": 0.0883, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 13.209547950992818, | |
| "grad_norm": 2.177140235900879, | |
| "learning_rate": 7.733702936018162e-07, | |
| "loss": 0.0703, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 13.243346007604563, | |
| "grad_norm": 1.9505295753479004, | |
| "learning_rate": 7.431300218256754e-07, | |
| "loss": 0.0734, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 13.277144064216307, | |
| "grad_norm": 1.5833914279937744, | |
| "learning_rate": 7.13470004277379e-07, | |
| "loss": 0.0464, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 13.310942120828052, | |
| "grad_norm": 2.343639850616455, | |
| "learning_rate": 6.843921001545429e-07, | |
| "loss": 0.0652, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 13.344740177439798, | |
| "grad_norm": 2.3044393062591553, | |
| "learning_rate": 6.558981321658009e-07, | |
| "loss": 0.1476, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 13.378538234051542, | |
| "grad_norm": 1.9348516464233398, | |
| "learning_rate": 6.279898864165423e-07, | |
| "loss": 0.0909, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 13.412336290663287, | |
| "grad_norm": 2.0757150650024414, | |
| "learning_rate": 6.006691122969644e-07, | |
| "loss": 0.1158, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 13.446134347275033, | |
| "grad_norm": 1.7569150924682617, | |
| "learning_rate": 5.739375223724108e-07, | |
| "loss": 0.0857, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 13.479932403886776, | |
| "grad_norm": 2.729357957839966, | |
| "learning_rate": 5.477967922760141e-07, | |
| "loss": 0.1197, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 13.513730460498522, | |
| "grad_norm": 1.9830466508865356, | |
| "learning_rate": 5.222485606036709e-07, | |
| "loss": 0.0667, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 13.547528517110266, | |
| "grad_norm": 2.4005234241485596, | |
| "learning_rate": 4.972944288113268e-07, | |
| "loss": 0.1217, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 13.581326573722011, | |
| "grad_norm": 2.361483335494995, | |
| "learning_rate": 4.729359611145845e-07, | |
| "loss": 0.11, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 13.615124630333757, | |
| "grad_norm": 1.8319944143295288, | |
| "learning_rate": 4.49174684390663e-07, | |
| "loss": 0.0716, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 13.6489226869455, | |
| "grad_norm": 1.7428772449493408, | |
| "learning_rate": 4.260120880826768e-07, | |
| "loss": 0.1552, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 13.682720743557246, | |
| "grad_norm": 2.0641117095947266, | |
| "learning_rate": 4.034496241062824e-07, | |
| "loss": 0.1185, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 13.71651880016899, | |
| "grad_norm": 2.4367544651031494, | |
| "learning_rate": 3.8148870675866145e-07, | |
| "loss": 0.1445, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 13.750316856780735, | |
| "grad_norm": 1.3618732690811157, | |
| "learning_rate": 3.601307126298648e-07, | |
| "loss": 0.0579, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 13.78411491339248, | |
| "grad_norm": 1.515920877456665, | |
| "learning_rate": 3.3937698051653034e-07, | |
| "loss": 0.0543, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 13.817912970004224, | |
| "grad_norm": 2.7224481105804443, | |
| "learning_rate": 3.1922881133795827e-07, | |
| "loss": 0.0955, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 13.85171102661597, | |
| "grad_norm": 2.9664154052734375, | |
| "learning_rate": 2.996874680545603e-07, | |
| "loss": 0.1153, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 13.885509083227713, | |
| "grad_norm": 2.224506139755249, | |
| "learning_rate": 2.8075417558870333e-07, | |
| "loss": 0.1311, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 13.919307139839459, | |
| "grad_norm": 1.8828747272491455, | |
| "learning_rate": 2.624301207479185e-07, | |
| "loss": 0.1198, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 13.953105196451205, | |
| "grad_norm": 2.1472909450531006, | |
| "learning_rate": 2.447164521505074e-07, | |
| "loss": 0.0764, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 13.986903253062948, | |
| "grad_norm": 2.1299331188201904, | |
| "learning_rate": 2.276142801535486e-07, | |
| "loss": 0.1228, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 14.020278833967048, | |
| "grad_norm": 2.9883980751037598, | |
| "learning_rate": 2.1112467678329197e-07, | |
| "loss": 0.1373, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 14.054076890578791, | |
| "grad_norm": 2.33616304397583, | |
| "learning_rate": 1.9524867566795945e-07, | |
| "loss": 0.0636, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 14.087874947190537, | |
| "grad_norm": 1.3360828161239624, | |
| "learning_rate": 1.7998727197295785e-07, | |
| "loss": 0.0624, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 14.12167300380228, | |
| "grad_norm": 1.6282477378845215, | |
| "learning_rate": 1.6534142233849527e-07, | |
| "loss": 0.0585, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 14.155471060414026, | |
| "grad_norm": 1.8694970607757568, | |
| "learning_rate": 1.5131204481961592e-07, | |
| "loss": 0.0747, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 14.189269117025772, | |
| "grad_norm": 1.441935420036316, | |
| "learning_rate": 1.3790001882865056e-07, | |
| "loss": 0.0718, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 14.223067173637515, | |
| "grad_norm": 2.2334372997283936, | |
| "learning_rate": 1.251061850800961e-07, | |
| "loss": 0.0947, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 14.256865230249261, | |
| "grad_norm": 2.6375110149383545, | |
| "learning_rate": 1.1293134553791551e-07, | |
| "loss": 0.1348, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 14.290663286861005, | |
| "grad_norm": 2.5394539833068848, | |
| "learning_rate": 1.0137626336526596e-07, | |
| "loss": 0.1289, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 14.32446134347275, | |
| "grad_norm": 1.4307893514633179, | |
| "learning_rate": 9.044166287666134e-08, | |
| "loss": 0.0499, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 14.358259400084496, | |
| "grad_norm": 1.6300568580627441, | |
| "learning_rate": 8.012822949256981e-08, | |
| "loss": 0.1074, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 14.39205745669624, | |
| "grad_norm": 1.95559823513031, | |
| "learning_rate": 7.043660969645261e-08, | |
| "loss": 0.0932, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 14.425855513307985, | |
| "grad_norm": 2.011991024017334, | |
| "learning_rate": 6.136741099423416e-08, | |
| "loss": 0.1045, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 14.45965356991973, | |
| "grad_norm": 2.210416078567505, | |
| "learning_rate": 5.2921201876223737e-08, | |
| "loss": 0.1052, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 14.493451626531474, | |
| "grad_norm": 2.3784687519073486, | |
| "learning_rate": 4.5098511781485056e-08, | |
| "loss": 0.0797, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 14.52724968314322, | |
| "grad_norm": 2.040710687637329, | |
| "learning_rate": 3.789983106464057e-08, | |
| "loss": 0.121, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 14.561047739754963, | |
| "grad_norm": 1.7879664897918701, | |
| "learning_rate": 3.132561096514164e-08, | |
| "loss": 0.0773, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 14.594845796366709, | |
| "grad_norm": 1.8894425630569458, | |
| "learning_rate": 2.5376263578977823e-08, | |
| "loss": 0.0965, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 14.628643852978454, | |
| "grad_norm": 1.9897544384002686, | |
| "learning_rate": 2.0052161832850858e-08, | |
| "loss": 0.078, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 14.662441909590198, | |
| "grad_norm": 1.689963698387146, | |
| "learning_rate": 1.5353639460793378e-08, | |
| "loss": 0.1024, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 14.696239966201944, | |
| "grad_norm": 1.8147411346435547, | |
| "learning_rate": 1.1280990983248975e-08, | |
| "loss": 0.1124, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 14.730038022813687, | |
| "grad_norm": 2.499868392944336, | |
| "learning_rate": 7.834471688616952e-09, | |
| "loss": 0.1116, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 14.763836079425433, | |
| "grad_norm": 1.9060348272323608, | |
| "learning_rate": 5.014297617242925e-09, | |
| "loss": 0.0975, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 14.797634136037178, | |
| "grad_norm": 2.312739849090576, | |
| "learning_rate": 2.8206455478774206e-09, | |
| "loss": 0.101, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 14.831432192648922, | |
| "grad_norm": 1.106972098350525, | |
| "learning_rate": 1.2536529866014058e-09, | |
| "loss": 0.0569, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 14.865230249260668, | |
| "grad_norm": 1.9655669927597046, | |
| "learning_rate": 3.1341815819763146e-10, | |
| "loss": 0.1144, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 14.899028305872413, | |
| "grad_norm": 2.5896451473236084, | |
| "learning_rate": 0.0, | |
| "loss": 0.0896, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 14.899028305872413, | |
| "step": 2205, | |
| "total_flos": 3.8395126284519014e+17, | |
| "train_loss": 0.4484830284334905, | |
| "train_runtime": 10885.404, | |
| "train_samples_per_second": 3.262, | |
| "train_steps_per_second": 0.203 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2205, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.8395126284519014e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |