Transformers
Safetensors
reasoning
logic
logic-inertia
qwen2
dpo
multi-step-inference
Fusion-Conflict-8B / trainer_state.json
qbao775's picture
Upload folder using huggingface_hub
9e45405 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 0.766525149345398,
"learning_rate": 7.6e-07,
"loss": -0.0699,
"step": 20
},
{
"epoch": 0.02,
"grad_norm": 1.2742258310317993,
"learning_rate": 1.56e-06,
"loss": -0.0817,
"step": 40
},
{
"epoch": 0.03,
"grad_norm": 3.1112349033355713,
"learning_rate": 2.3600000000000003e-06,
"loss": -0.0788,
"step": 60
},
{
"epoch": 0.04,
"grad_norm": 39.28089141845703,
"learning_rate": 3.1600000000000002e-06,
"loss": -0.1878,
"step": 80
},
{
"epoch": 0.05,
"grad_norm": 1.4309356212615967,
"learning_rate": 3.96e-06,
"loss": -0.0402,
"step": 100
},
{
"epoch": 0.06,
"grad_norm": 8.633565902709961,
"learning_rate": 4.76e-06,
"loss": -0.0801,
"step": 120
},
{
"epoch": 0.07,
"grad_norm": 2.7842857837677,
"learning_rate": 5.560000000000001e-06,
"loss": -0.098,
"step": 140
},
{
"epoch": 0.08,
"grad_norm": 0.09758752584457397,
"learning_rate": 6.360000000000001e-06,
"loss": -0.1227,
"step": 160
},
{
"epoch": 0.09,
"grad_norm": 4.8647260665893555,
"learning_rate": 7.16e-06,
"loss": -0.0446,
"step": 180
},
{
"epoch": 0.1,
"grad_norm": 4.039462089538574,
"learning_rate": 7.960000000000002e-06,
"loss": -0.1432,
"step": 200
},
{
"epoch": 0.11,
"grad_norm": 0.05541618913412094,
"learning_rate": 8.76e-06,
"loss": -0.1669,
"step": 220
},
{
"epoch": 0.12,
"grad_norm": 32.180450439453125,
"learning_rate": 9.56e-06,
"loss": -0.072,
"step": 240
},
{
"epoch": 0.13,
"grad_norm": 0.8857631683349609,
"learning_rate": 1.036e-05,
"loss": -0.0392,
"step": 260
},
{
"epoch": 0.14,
"grad_norm": 0.09615501761436462,
"learning_rate": 1.1160000000000002e-05,
"loss": -0.035,
"step": 280
},
{
"epoch": 0.15,
"grad_norm": 4.058862209320068,
"learning_rate": 1.196e-05,
"loss": -0.0466,
"step": 300
},
{
"epoch": 0.16,
"grad_norm": 0.292805939912796,
"learning_rate": 1.2760000000000001e-05,
"loss": -0.0186,
"step": 320
},
{
"epoch": 0.17,
"grad_norm": 0.002572560915723443,
"learning_rate": 1.3560000000000002e-05,
"loss": -0.0461,
"step": 340
},
{
"epoch": 0.18,
"grad_norm": 0.032721325755119324,
"learning_rate": 1.4360000000000001e-05,
"loss": -0.0244,
"step": 360
},
{
"epoch": 0.19,
"grad_norm": 0.014479526318609715,
"learning_rate": 1.516e-05,
"loss": -0.0315,
"step": 380
},
{
"epoch": 0.2,
"grad_norm": 0.0007994524785317481,
"learning_rate": 1.5960000000000003e-05,
"loss": -0.0677,
"step": 400
},
{
"epoch": 0.21,
"grad_norm": 0.0004835372092202306,
"learning_rate": 1.6760000000000002e-05,
"loss": 0.0,
"step": 420
},
{
"epoch": 0.22,
"grad_norm": 1.9216458895243704e-05,
"learning_rate": 1.756e-05,
"loss": -0.0365,
"step": 440
},
{
"epoch": 0.23,
"grad_norm": 1.3274141565489117e-05,
"learning_rate": 1.8360000000000004e-05,
"loss": 0.0,
"step": 460
},
{
"epoch": 0.24,
"grad_norm": 1.9133960904582636e-06,
"learning_rate": 1.916e-05,
"loss": 0.0,
"step": 480
},
{
"epoch": 0.25,
"grad_norm": 0.000470530241727829,
"learning_rate": 1.9960000000000002e-05,
"loss": -0.019,
"step": 500
},
{
"epoch": 0.26,
"grad_norm": 2.208295882155653e-06,
"learning_rate": 1.999980260856137e-05,
"loss": -0.0024,
"step": 520
},
{
"epoch": 0.27,
"grad_norm": 0.001379099558107555,
"learning_rate": 1.999916834015426e-05,
"loss": -0.0135,
"step": 540
},
{
"epoch": 0.28,
"grad_norm": 0.0030259762424975634,
"learning_rate": 1.9998096675731135e-05,
"loss": -0.001,
"step": 560
},
{
"epoch": 0.29,
"grad_norm": 0.0003752721822820604,
"learning_rate": 1.9996587662170075e-05,
"loss": -0.0611,
"step": 580
},
{
"epoch": 0.3,
"grad_norm": 0.0004474676970858127,
"learning_rate": 1.9994641365480218e-05,
"loss": 0.0,
"step": 600
},
{
"epoch": 0.31,
"grad_norm": 0.0006332076736725867,
"learning_rate": 1.999225787079888e-05,
"loss": 0.0,
"step": 620
},
{
"epoch": 0.32,
"grad_norm": 0.00026314359274692833,
"learning_rate": 1.9989437282387855e-05,
"loss": -0.0392,
"step": 640
},
{
"epoch": 0.33,
"grad_norm": 0.008587045595049858,
"learning_rate": 1.9986179723628806e-05,
"loss": -0.0055,
"step": 660
},
{
"epoch": 0.34,
"grad_norm": 9.715045598568395e-05,
"learning_rate": 1.998248533701791e-05,
"loss": 0.0,
"step": 680
},
{
"epoch": 0.35,
"grad_norm": 0.0002863932168111205,
"learning_rate": 1.9978354284159604e-05,
"loss": -0.0022,
"step": 700
},
{
"epoch": 0.36,
"grad_norm": 1.0512692824704573e-05,
"learning_rate": 1.9973786745759523e-05,
"loss": 0.0,
"step": 720
},
{
"epoch": 0.37,
"grad_norm": 51.87382507324219,
"learning_rate": 1.9968782921616595e-05,
"loss": -0.0265,
"step": 740
},
{
"epoch": 0.38,
"grad_norm": 0.0003187332767993212,
"learning_rate": 1.99633430306143e-05,
"loss": -0.0183,
"step": 760
},
{
"epoch": 0.39,
"grad_norm": 0.00012655163300223649,
"learning_rate": 1.995746731071109e-05,
"loss": 0.0,
"step": 780
},
{
"epoch": 0.4,
"grad_norm": 1.217572389577981e-06,
"learning_rate": 1.9951156018929986e-05,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.41,
"grad_norm": 2.5256804292439483e-07,
"learning_rate": 1.994440943134734e-05,
"loss": 0.0,
"step": 820
},
{
"epoch": 0.42,
"grad_norm": 4.53083657703246e-06,
"learning_rate": 1.9937227843080746e-05,
"loss": 0.0,
"step": 840
},
{
"epoch": 0.43,
"grad_norm": 8.866226153259049e-08,
"learning_rate": 1.9929611568276146e-05,
"loss": 0.0,
"step": 860
},
{
"epoch": 0.44,
"grad_norm": 7.812383984173721e-08,
"learning_rate": 1.9921560940094068e-05,
"loss": 0.0,
"step": 880
},
{
"epoch": 0.45,
"grad_norm": 1.4238279444356294e-09,
"learning_rate": 1.9913076310695068e-05,
"loss": 0.0,
"step": 900
},
{
"epoch": 0.46,
"grad_norm": 1.6094839350166978e-10,
"learning_rate": 1.9904158051224324e-05,
"loss": 0.0,
"step": 920
},
{
"epoch": 0.47,
"grad_norm": 0.0,
"learning_rate": 1.98948065517954e-05,
"loss": 0.0,
"step": 940
},
{
"epoch": 0.48,
"grad_norm": 0.0,
"learning_rate": 1.988502222147317e-05,
"loss": 0.0,
"step": 960
},
{
"epoch": 0.49,
"grad_norm": 0.0,
"learning_rate": 1.9874805488255942e-05,
"loss": 0.0,
"step": 980
},
{
"epoch": 0.5,
"grad_norm": 0.0,
"learning_rate": 1.9864156799056725e-05,
"loss": 0.0,
"step": 1000
},
{
"epoch": 0.51,
"grad_norm": 0.0,
"learning_rate": 1.985307661968368e-05,
"loss": 0.0,
"step": 1020
},
{
"epoch": 0.52,
"grad_norm": 0.0,
"learning_rate": 1.9841565434819746e-05,
"loss": 0.0,
"step": 1040
},
{
"epoch": 0.53,
"grad_norm": 0.0,
"learning_rate": 1.9829623748001447e-05,
"loss": 0.0,
"step": 1060
},
{
"epoch": 0.54,
"grad_norm": 0.00012120811879867688,
"learning_rate": 1.981725208159684e-05,
"loss": -0.0181,
"step": 1080
},
{
"epoch": 0.55,
"grad_norm": 7.75166608946165e-06,
"learning_rate": 1.98044509767827e-05,
"loss": 0.0,
"step": 1100
},
{
"epoch": 0.56,
"grad_norm": 9.087637408811133e-06,
"learning_rate": 1.979122099352082e-05,
"loss": 0.0,
"step": 1120
},
{
"epoch": 0.57,
"grad_norm": 8.575042556913104e-06,
"learning_rate": 1.9777562710533528e-05,
"loss": 0.0,
"step": 1140
},
{
"epoch": 0.58,
"grad_norm": 4.427300154929981e-06,
"learning_rate": 1.9763476725278364e-05,
"loss": 0.0,
"step": 1160
},
{
"epoch": 0.59,
"grad_norm": 2.748534289054305e-09,
"learning_rate": 1.9748963653921957e-05,
"loss": 0.0,
"step": 1180
},
{
"epoch": 0.6,
"grad_norm": 2.178993327106582e-06,
"learning_rate": 1.9734024131313068e-05,
"loss": 0.0,
"step": 1200
},
{
"epoch": 0.61,
"grad_norm": 0.0376846008002758,
"learning_rate": 1.971865881095481e-05,
"loss": -0.047,
"step": 1220
},
{
"epoch": 0.62,
"grad_norm": 2.79782761936076e-05,
"learning_rate": 1.9702868364976068e-05,
"loss": -0.0117,
"step": 1240
},
{
"epoch": 0.63,
"grad_norm": 3.6909668210682867e-07,
"learning_rate": 1.968665348410211e-05,
"loss": 0.0,
"step": 1260
},
{
"epoch": 0.64,
"grad_norm": 2.4639082766952924e-05,
"learning_rate": 1.9670014877624353e-05,
"loss": -0.014,
"step": 1280
},
{
"epoch": 0.65,
"grad_norm": 1.4362817637447733e-06,
"learning_rate": 1.9652953273369344e-05,
"loss": 0.0,
"step": 1300
},
{
"epoch": 0.66,
"grad_norm": 2.5066849048016593e-05,
"learning_rate": 1.9635469417666922e-05,
"loss": -0.0035,
"step": 1320
},
{
"epoch": 0.67,
"grad_norm": 0.0002359877253184095,
"learning_rate": 1.9617564075317585e-05,
"loss": 0.0,
"step": 1340
},
{
"epoch": 0.68,
"grad_norm": 1.3228980151325231e-06,
"learning_rate": 1.9599238029559005e-05,
"loss": 0.0,
"step": 1360
},
{
"epoch": 0.69,
"grad_norm": 8.186302125068323e-08,
"learning_rate": 1.958049208203179e-05,
"loss": 0.0,
"step": 1380
},
{
"epoch": 0.7,
"grad_norm": 7.560096992875742e-10,
"learning_rate": 1.9561327052744422e-05,
"loss": 0.0,
"step": 1400
},
{
"epoch": 0.71,
"grad_norm": 5.596998420287491e-10,
"learning_rate": 1.9541743780037358e-05,
"loss": 0.0,
"step": 1420
},
{
"epoch": 0.72,
"grad_norm": 1.471132327068858e-09,
"learning_rate": 1.9521743120546396e-05,
"loss": 0.0,
"step": 1440
},
{
"epoch": 0.73,
"grad_norm": 4.28950278075213e-11,
"learning_rate": 1.950132594916517e-05,
"loss": 0.0,
"step": 1460
},
{
"epoch": 0.74,
"grad_norm": 0.0,
"learning_rate": 1.9480493159006897e-05,
"loss": 0.0,
"step": 1480
},
{
"epoch": 0.75,
"grad_norm": 0.0,
"learning_rate": 1.9459245661365312e-05,
"loss": 0.0,
"step": 1500
},
{
"epoch": 0.76,
"grad_norm": 0.0,
"learning_rate": 1.9437584385674793e-05,
"loss": 0.0,
"step": 1520
},
{
"epoch": 0.77,
"grad_norm": 0.0,
"learning_rate": 1.941551027946971e-05,
"loss": 0.0,
"step": 1540
},
{
"epoch": 0.78,
"grad_norm": 0.0,
"learning_rate": 1.9393024308342982e-05,
"loss": 0.0,
"step": 1560
},
{
"epoch": 0.79,
"grad_norm": 0.0,
"learning_rate": 1.9370127455903825e-05,
"loss": 0.0,
"step": 1580
},
{
"epoch": 0.8,
"grad_norm": 0.0,
"learning_rate": 1.9346820723734746e-05,
"loss": 0.0,
"step": 1600
},
{
"epoch": 0.81,
"grad_norm": 0.0,
"learning_rate": 1.932310513134771e-05,
"loss": 0.0,
"step": 1620
},
{
"epoch": 0.82,
"grad_norm": 0.0,
"learning_rate": 1.9298981716139554e-05,
"loss": 0.0,
"step": 1640
},
{
"epoch": 0.83,
"grad_norm": 0.0,
"learning_rate": 1.9274451533346617e-05,
"loss": 0.0,
"step": 1660
},
{
"epoch": 0.84,
"grad_norm": 0.0,
"learning_rate": 1.9249515655998547e-05,
"loss": 0.0,
"step": 1680
},
{
"epoch": 0.85,
"grad_norm": 0.0,
"learning_rate": 1.9224175174871415e-05,
"loss": 0.0,
"step": 1700
},
{
"epoch": 0.86,
"grad_norm": 0.0,
"learning_rate": 1.9198431198439945e-05,
"loss": 0.0,
"step": 1720
},
{
"epoch": 0.87,
"grad_norm": 0.0,
"learning_rate": 1.9172284852829074e-05,
"loss": 0.0,
"step": 1740
},
{
"epoch": 0.88,
"grad_norm": 0.0,
"learning_rate": 1.914573728176466e-05,
"loss": 0.0,
"step": 1760
},
{
"epoch": 0.89,
"grad_norm": 0.0,
"learning_rate": 1.9118789646523458e-05,
"loss": 0.0,
"step": 1780
},
{
"epoch": 0.9,
"grad_norm": 0.0,
"learning_rate": 1.9091443125882336e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 0.91,
"grad_norm": 0.0,
"learning_rate": 1.9063698916066698e-05,
"loss": 0.0,
"step": 1820
},
{
"epoch": 0.92,
"grad_norm": 18.19425392150879,
"learning_rate": 1.9035558230698154e-05,
"loss": -0.021,
"step": 1840
},
{
"epoch": 0.93,
"grad_norm": 0.0011678176233544946,
"learning_rate": 1.9007022300741457e-05,
"loss": -0.0368,
"step": 1860
},
{
"epoch": 0.94,
"grad_norm": 1.425844669342041,
"learning_rate": 1.897809237445061e-05,
"loss": -0.0002,
"step": 1880
},
{
"epoch": 0.95,
"grad_norm": 0.12504132091999054,
"learning_rate": 1.894876971731433e-05,
"loss": -0.0015,
"step": 1900
},
{
"epoch": 0.96,
"grad_norm": 3.9395573026013153e-07,
"learning_rate": 1.8919055612000613e-05,
"loss": 0.0,
"step": 1920
},
{
"epoch": 0.97,
"grad_norm": 1.2863043821198517e-06,
"learning_rate": 1.8888951358300694e-05,
"loss": 0.0,
"step": 1940
},
{
"epoch": 0.98,
"grad_norm": 3.974708306486718e-06,
"learning_rate": 1.8858458273072158e-05,
"loss": -0.0137,
"step": 1960
},
{
"epoch": 0.99,
"grad_norm": 3.261742676841095e-05,
"learning_rate": 1.882757769018133e-05,
"loss": 0.0,
"step": 1980
},
{
"epoch": 1.0,
"grad_norm": 2.98726092751167e-08,
"learning_rate": 1.879631096044495e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 1.01,
"grad_norm": 3.763807967516186e-07,
"learning_rate": 1.876465945157106e-05,
"loss": 0.0,
"step": 2020
},
{
"epoch": 1.02,
"grad_norm": 2.658192777005297e-09,
"learning_rate": 1.8732624548099204e-05,
"loss": 0.0,
"step": 2040
},
{
"epoch": 1.03,
"grad_norm": 1.2280678696185987e-09,
"learning_rate": 1.8700207651339832e-05,
"loss": 0.0,
"step": 2060
},
{
"epoch": 1.04,
"grad_norm": 2.058490267131674e-09,
"learning_rate": 1.8667410179313027e-05,
"loss": 0.0,
"step": 2080
},
{
"epoch": 1.05,
"grad_norm": 1.4351135731172082e-11,
"learning_rate": 1.863423356668646e-05,
"loss": 0.0,
"step": 2100
},
{
"epoch": 1.06,
"grad_norm": 0.0,
"learning_rate": 1.8600679264712652e-05,
"loss": 0.0,
"step": 2120
},
{
"epoch": 1.07,
"grad_norm": 0.0,
"learning_rate": 1.8566748741165465e-05,
"loss": 0.0,
"step": 2140
},
{
"epoch": 1.08,
"grad_norm": 0.0,
"learning_rate": 1.8532443480275924e-05,
"loss": 0.0,
"step": 2160
},
{
"epoch": 1.09,
"grad_norm": 0.0,
"learning_rate": 1.8497764982667275e-05,
"loss": 0.0,
"step": 2180
},
{
"epoch": 1.1,
"grad_norm": 0.0,
"learning_rate": 1.8462714765289342e-05,
"loss": 0.0,
"step": 2200
},
{
"epoch": 1.11,
"grad_norm": 0.0,
"learning_rate": 1.8427294361352176e-05,
"loss": 0.0,
"step": 2220
},
{
"epoch": 1.12,
"grad_norm": 0.0,
"learning_rate": 1.8391505320259e-05,
"loss": 0.0,
"step": 2240
},
{
"epoch": 1.13,
"grad_norm": 0.0,
"learning_rate": 1.8355349207538408e-05,
"loss": 0.0,
"step": 2260
},
{
"epoch": 1.1400000000000001,
"grad_norm": 0.0,
"learning_rate": 1.8318827604775902e-05,
"loss": 0.0,
"step": 2280
},
{
"epoch": 1.15,
"grad_norm": 0.0,
"learning_rate": 1.8281942109544698e-05,
"loss": 0.0,
"step": 2300
},
{
"epoch": 1.16,
"grad_norm": 0.0,
"learning_rate": 1.8244694335335853e-05,
"loss": 0.0,
"step": 2320
},
{
"epoch": 1.17,
"grad_norm": 0.0,
"learning_rate": 1.820708591148767e-05,
"loss": 0.0,
"step": 2340
},
{
"epoch": 1.18,
"grad_norm": 0.0,
"learning_rate": 1.816911848311445e-05,
"loss": 0.0,
"step": 2360
},
{
"epoch": 1.19,
"grad_norm": 0.0,
"learning_rate": 1.8130793711034506e-05,
"loss": 0.0,
"step": 2380
},
{
"epoch": 1.2,
"grad_norm": 0.0,
"learning_rate": 1.8092113271697522e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 1.21,
"grad_norm": 0.0,
"learning_rate": 1.8053078857111218e-05,
"loss": 0.0,
"step": 2420
},
{
"epoch": 1.22,
"grad_norm": 0.0,
"learning_rate": 1.801369217476735e-05,
"loss": 0.0,
"step": 2440
},
{
"epoch": 1.23,
"grad_norm": 0.0,
"learning_rate": 1.7973954947566995e-05,
"loss": 0.0,
"step": 2460
},
{
"epoch": 1.24,
"grad_norm": 0.0,
"learning_rate": 1.7933868913745205e-05,
"loss": 0.0,
"step": 2480
},
{
"epoch": 1.25,
"grad_norm": 0.0,
"learning_rate": 1.789343582679495e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 1.26,
"grad_norm": 0.0,
"learning_rate": 1.785265745539045e-05,
"loss": 0.0,
"step": 2520
},
{
"epoch": 1.27,
"grad_norm": 0.0,
"learning_rate": 1.781153558330977e-05,
"loss": 0.0,
"step": 2540
},
{
"epoch": 1.28,
"grad_norm": 0.0,
"learning_rate": 1.7770072009356805e-05,
"loss": 0.0,
"step": 2560
},
{
"epoch": 1.29,
"grad_norm": 0.0,
"learning_rate": 1.772826854728261e-05,
"loss": 0.0,
"step": 2580
},
{
"epoch": 1.3,
"grad_norm": 0.0,
"learning_rate": 1.7686127025706038e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 1.31,
"grad_norm": 0.0,
"learning_rate": 1.7643649288033766e-05,
"loss": 0.0,
"step": 2620
},
{
"epoch": 1.32,
"grad_norm": 0.0,
"learning_rate": 1.760083719237964e-05,
"loss": 0.0,
"step": 2640
},
{
"epoch": 1.33,
"grad_norm": 0.0,
"learning_rate": 1.755769261148343e-05,
"loss": 0.0,
"step": 2660
},
{
"epoch": 1.34,
"grad_norm": 0.0,
"learning_rate": 1.7514217432628856e-05,
"loss": 0.0,
"step": 2680
},
{
"epoch": 1.35,
"grad_norm": 0.0,
"learning_rate": 1.7470413557561098e-05,
"loss": 0.0,
"step": 2700
},
{
"epoch": 1.3599999999999999,
"grad_norm": 6.09237849857891e-06,
"learning_rate": 1.7426282902403545e-05,
"loss": -0.0595,
"step": 2720
},
{
"epoch": 1.37,
"grad_norm": 6.212492280610604e-06,
"learning_rate": 1.7381827397574017e-05,
"loss": 0.0,
"step": 2740
},
{
"epoch": 1.38,
"grad_norm": 1.1541774256329518e-06,
"learning_rate": 1.733704898770032e-05,
"loss": 0.0,
"step": 2760
},
{
"epoch": 1.3900000000000001,
"grad_norm": 6.421708462767128e-07,
"learning_rate": 1.7291949631535155e-05,
"loss": 0.0,
"step": 2780
},
{
"epoch": 1.4,
"grad_norm": 3.152386085503167e-08,
"learning_rate": 1.7246531301870467e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 1.41,
"grad_norm": 8.747147717258485e-08,
"learning_rate": 1.720079598545113e-05,
"loss": 0.0,
"step": 2820
},
{
"epoch": 1.42,
"grad_norm": 3.257514258336869e-09,
"learning_rate": 1.7154745682888045e-05,
"loss": 0.0,
"step": 2840
},
{
"epoch": 1.43,
"grad_norm": 0.06368578225374222,
"learning_rate": 1.7108382408570626e-05,
"loss": -0.0118,
"step": 2860
},
{
"epoch": 1.44,
"grad_norm": 38.9387321472168,
"learning_rate": 1.7061708190578695e-05,
"loss": -0.01,
"step": 2880
},
{
"epoch": 1.45,
"grad_norm": 3.736419103006483e-07,
"learning_rate": 1.7014725070593742e-05,
"loss": 0.0,
"step": 2900
},
{
"epoch": 1.46,
"grad_norm": 7.977614018273016e-07,
"learning_rate": 1.6967435103809646e-05,
"loss": -0.0327,
"step": 2920
},
{
"epoch": 1.47,
"grad_norm": 1.0230847919956432e-06,
"learning_rate": 1.691984035884275e-05,
"loss": 0.0,
"step": 2940
},
{
"epoch": 1.48,
"grad_norm": 18.049253463745117,
"learning_rate": 1.6871942917641385e-05,
"loss": -0.0016,
"step": 2960
},
{
"epoch": 1.49,
"grad_norm": 0.00011181381705682725,
"learning_rate": 1.6823744875394788e-05,
"loss": -0.0062,
"step": 2980
},
{
"epoch": 1.5,
"grad_norm": 85.54637145996094,
"learning_rate": 1.677524834044148e-05,
"loss": -0.051,
"step": 3000
},
{
"epoch": 1.51,
"grad_norm": 2.2363690277416026e-07,
"learning_rate": 1.6726455434177e-05,
"loss": 0.0,
"step": 3020
},
{
"epoch": 1.52,
"grad_norm": 5.164109850852583e-08,
"learning_rate": 1.6677368290961133e-05,
"loss": 0.0,
"step": 3040
},
{
"epoch": 1.53,
"grad_norm": 1.7374658156654732e-08,
"learning_rate": 1.6627989058024546e-05,
"loss": 0.0,
"step": 3060
},
{
"epoch": 1.54,
"grad_norm": 1.4617263996430374e-11,
"learning_rate": 1.6578319895374854e-05,
"loss": 0.0,
"step": 3080
},
{
"epoch": 1.55,
"grad_norm": 5.5272342258660956e-11,
"learning_rate": 1.652836297570214e-05,
"loss": 0.0,
"step": 3100
},
{
"epoch": 1.56,
"grad_norm": 1.90355758933336e-10,
"learning_rate": 1.64781204842839e-05,
"loss": 0.0,
"step": 3120
},
{
"epoch": 1.5699999999999998,
"grad_norm": 1.7849938541147914e-11,
"learning_rate": 1.6427594618889484e-05,
"loss": 0.0,
"step": 3140
},
{
"epoch": 1.58,
"grad_norm": 1.1557519264543403e-12,
"learning_rate": 1.6376787589683914e-05,
"loss": 0.0,
"step": 3160
},
{
"epoch": 1.5899999999999999,
"grad_norm": 0.0,
"learning_rate": 1.6325701619131246e-05,
"loss": 0.0,
"step": 3180
},
{
"epoch": 1.6,
"grad_norm": 0.0,
"learning_rate": 1.6274338941897325e-05,
"loss": 0.0,
"step": 3200
},
{
"epoch": 1.6099999999999999,
"grad_norm": 0.0,
"learning_rate": 1.6222701804752047e-05,
"loss": 0.0,
"step": 3220
},
{
"epoch": 1.62,
"grad_norm": 0.0,
"learning_rate": 1.6170792466471072e-05,
"loss": 0.0,
"step": 3240
},
{
"epoch": 1.63,
"grad_norm": 0.0,
"learning_rate": 1.6118613197737013e-05,
"loss": 0.0,
"step": 3260
},
{
"epoch": 1.6400000000000001,
"grad_norm": 0.0,
"learning_rate": 1.606616628104013e-05,
"loss": 0.0,
"step": 3280
},
{
"epoch": 1.65,
"grad_norm": 0.0,
"learning_rate": 1.6013454010578465e-05,
"loss": 0.0,
"step": 3300
},
{
"epoch": 1.6600000000000001,
"grad_norm": 0.0,
"learning_rate": 1.5960478692157483e-05,
"loss": 0.0,
"step": 3320
},
{
"epoch": 1.67,
"grad_norm": 0.0,
"learning_rate": 1.5907242643089234e-05,
"loss": 0.0,
"step": 3340
},
{
"epoch": 1.6800000000000002,
"grad_norm": 0.0,
"learning_rate": 1.5853748192090967e-05,
"loss": 0.0,
"step": 3360
},
{
"epoch": 1.69,
"grad_norm": 0.0,
"learning_rate": 1.5799997679183258e-05,
"loss": 0.0,
"step": 3380
},
{
"epoch": 1.7,
"grad_norm": 0.0,
"learning_rate": 1.5745993455587678e-05,
"loss": 0.0,
"step": 3400
},
{
"epoch": 1.71,
"grad_norm": 0.0,
"learning_rate": 1.5691737883623912e-05,
"loss": 0.0,
"step": 3420
},
{
"epoch": 1.72,
"grad_norm": 0.0,
"learning_rate": 1.563723333660644e-05,
"loss": 0.0,
"step": 3440
},
{
"epoch": 1.73,
"grad_norm": 0.0,
"learning_rate": 1.5582482198740726e-05,
"loss": 0.0,
"step": 3460
},
{
"epoch": 1.74,
"grad_norm": 0.0,
"learning_rate": 1.5527486865018904e-05,
"loss": 0.0,
"step": 3480
},
{
"epoch": 1.75,
"grad_norm": 0.0,
"learning_rate": 1.547224974111503e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 1.76,
"grad_norm": 0.0,
"learning_rate": 1.541677324327985e-05,
"loss": 0.0,
"step": 3520
},
{
"epoch": 1.77,
"grad_norm": 0.0,
"learning_rate": 1.5361059798235093e-05,
"loss": 0.0,
"step": 3540
},
{
"epoch": 1.78,
"grad_norm": 0.0,
"learning_rate": 1.5305111843067343e-05,
"loss": 0.0,
"step": 3560
},
{
"epoch": 1.79,
"grad_norm": 0.0,
"learning_rate": 1.5248931825121393e-05,
"loss": 0.0,
"step": 3580
},
{
"epoch": 1.8,
"grad_norm": 0.0,
"learning_rate": 1.5192522201893236e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 1.81,
"grad_norm": 0.0,
"learning_rate": 1.5135885440922522e-05,
"loss": 0.0,
"step": 3620
},
{
"epoch": 1.8199999999999998,
"grad_norm": 0.0,
"learning_rate": 1.5079024019684668e-05,
"loss": 0.0,
"step": 3640
},
{
"epoch": 1.83,
"grad_norm": 0.0,
"learning_rate": 1.502194042548243e-05,
"loss": 0.0,
"step": 3660
},
{
"epoch": 1.8399999999999999,
"grad_norm": 0.0,
"learning_rate": 1.4964637155337156e-05,
"loss": 0.0,
"step": 3680
},
{
"epoch": 1.85,
"grad_norm": 0.0,
"learning_rate": 1.4907116715879511e-05,
"loss": 0.0,
"step": 3700
},
{
"epoch": 1.8599999999999999,
"grad_norm": 0.0,
"learning_rate": 1.484938162323986e-05,
"loss": 0.0,
"step": 3720
},
{
"epoch": 1.87,
"grad_norm": 3.1729632610222325e-07,
"learning_rate": 1.4791434402938191e-05,
"loss": -0.004,
"step": 3740
},
{
"epoch": 1.88,
"grad_norm": 2.9630384688061895e-06,
"learning_rate": 1.4733277589773641e-05,
"loss": 0.0,
"step": 3760
},
{
"epoch": 1.8900000000000001,
"grad_norm": 6.645004759775475e-07,
"learning_rate": 1.4674913727713623e-05,
"loss": 0.0,
"step": 3780
},
{
"epoch": 1.9,
"grad_norm": 8.223436452681199e-06,
"learning_rate": 1.4616345369782534e-05,
"loss": -0.0073,
"step": 3800
},
{
"epoch": 1.9100000000000001,
"grad_norm": 6.144579742795031e-07,
"learning_rate": 1.4557575077950085e-05,
"loss": 0.0,
"step": 3820
},
{
"epoch": 1.92,
"grad_norm": 6.885377956677985e-07,
"learning_rate": 1.4498605423019234e-05,
"loss": 0.0,
"step": 3840
},
{
"epoch": 1.9300000000000002,
"grad_norm": 1.0231551716977094e-11,
"learning_rate": 1.4439438984513716e-05,
"loss": 0.0,
"step": 3860
},
{
"epoch": 1.94,
"grad_norm": 1.5923303257545096e-12,
"learning_rate": 1.438007835056523e-05,
"loss": 0.0,
"step": 3880
},
{
"epoch": 1.95,
"grad_norm": 3.1159091973265163e-10,
"learning_rate": 1.4320526117800201e-05,
"loss": 0.0,
"step": 3900
},
{
"epoch": 1.96,
"grad_norm": 1.0668846361916451e-10,
"learning_rate": 1.4260784891226217e-05,
"loss": 0.0,
"step": 3920
},
{
"epoch": 1.97,
"grad_norm": 7.081080860560363e-11,
"learning_rate": 1.4200857284118067e-05,
"loss": 0.0,
"step": 3940
},
{
"epoch": 1.98,
"grad_norm": 0.0,
"learning_rate": 1.4140745917903413e-05,
"loss": 0.0,
"step": 3960
},
{
"epoch": 1.99,
"grad_norm": 0.0,
"learning_rate": 1.4080453422048152e-05,
"loss": 0.0,
"step": 3980
},
{
"epoch": 2.0,
"grad_norm": 0.0,
"learning_rate": 1.401998243394138e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 2.01,
"grad_norm": 0.0,
"learning_rate": 1.3959335598780009e-05,
"loss": 0.0,
"step": 4020
},
{
"epoch": 2.02,
"grad_norm": 0.0,
"learning_rate": 1.3898515569453076e-05,
"loss": 0.0,
"step": 4040
},
{
"epoch": 2.03,
"grad_norm": 0.0,
"learning_rate": 1.3837525006425698e-05,
"loss": 0.0,
"step": 4060
},
{
"epoch": 2.04,
"grad_norm": 0.0,
"learning_rate": 1.3776366577622681e-05,
"loss": 0.0,
"step": 4080
},
{
"epoch": 2.05,
"grad_norm": 0.0,
"learning_rate": 1.3715042958311831e-05,
"loss": 0.0,
"step": 4100
},
{
"epoch": 2.06,
"grad_norm": 0.0,
"learning_rate": 1.365355683098691e-05,
"loss": 0.0,
"step": 4120
},
{
"epoch": 2.07,
"grad_norm": 0.0,
"learning_rate": 1.3591910885250318e-05,
"loss": 0.0,
"step": 4140
},
{
"epoch": 2.08,
"grad_norm": 0.0,
"learning_rate": 1.3530107817695433e-05,
"loss": 0.0,
"step": 4160
},
{
"epoch": 2.09,
"grad_norm": 0.0,
"learning_rate": 1.3468150331788634e-05,
"loss": 0.0,
"step": 4180
},
{
"epoch": 2.1,
"grad_norm": 0.0,
"learning_rate": 1.3406041137751076e-05,
"loss": 0.0,
"step": 4200
},
{
"epoch": 2.11,
"grad_norm": 0.0,
"learning_rate": 1.3343782952440109e-05,
"loss": 0.0,
"step": 4220
},
{
"epoch": 2.12,
"grad_norm": 0.0,
"learning_rate": 1.3281378499230448e-05,
"loss": 0.0,
"step": 4240
},
{
"epoch": 2.13,
"grad_norm": 0.0,
"learning_rate": 1.3218830507895035e-05,
"loss": 0.0,
"step": 4260
},
{
"epoch": 2.14,
"grad_norm": 0.0,
"learning_rate": 1.3156141714485647e-05,
"loss": 0.0,
"step": 4280
},
{
"epoch": 2.15,
"grad_norm": 0.0,
"learning_rate": 1.3093314861213187e-05,
"loss": 0.0,
"step": 4300
},
{
"epoch": 2.16,
"grad_norm": 0.0,
"learning_rate": 1.3030352696327741e-05,
"loss": 0.0,
"step": 4320
},
{
"epoch": 2.17,
"grad_norm": 0.0,
"learning_rate": 1.296725797399838e-05,
"loss": 0.0,
"step": 4340
},
{
"epoch": 2.18,
"grad_norm": 0.0,
"learning_rate": 1.2904033454192653e-05,
"loss": 0.0,
"step": 4360
},
{
"epoch": 2.19,
"grad_norm": 0.0,
"learning_rate": 1.2840681902555876e-05,
"loss": 0.0,
"step": 4380
},
{
"epoch": 2.2,
"grad_norm": 0.0,
"learning_rate": 1.2777206090290148e-05,
"loss": 0.0,
"step": 4400
},
{
"epoch": 2.21,
"grad_norm": 0.0,
"learning_rate": 1.271360879403313e-05,
"loss": 0.0,
"step": 4420
},
{
"epoch": 2.22,
"grad_norm": 0.0,
"learning_rate": 1.2649892795736588e-05,
"loss": 0.0,
"step": 4440
},
{
"epoch": 2.23,
"grad_norm": 0.0,
"learning_rate": 1.2586060882544695e-05,
"loss": 0.0,
"step": 4460
},
{
"epoch": 2.24,
"grad_norm": 0.0,
"learning_rate": 1.2522115846672129e-05,
"loss": 0.0,
"step": 4480
},
{
"epoch": 2.25,
"grad_norm": 0.0,
"learning_rate": 1.2458060485281903e-05,
"loss": 0.0,
"step": 4500
},
{
"epoch": 2.26,
"grad_norm": 0.0,
"learning_rate": 1.2393897600363045e-05,
"loss": 0.0,
"step": 4520
},
{
"epoch": 2.27,
"grad_norm": 0.0,
"learning_rate": 1.2329629998607991e-05,
"loss": 0.0,
"step": 4540
},
{
"epoch": 2.2800000000000002,
"grad_norm": 0.0,
"learning_rate": 1.2265260491289843e-05,
"loss": 0.0,
"step": 4560
},
{
"epoch": 2.29,
"grad_norm": 0.0,
"learning_rate": 1.220079189413938e-05,
"loss": 0.0,
"step": 4580
},
{
"epoch": 2.3,
"grad_norm": 0.0,
"learning_rate": 1.2136227027221887e-05,
"loss": 0.0,
"step": 4600
},
{
"epoch": 2.31,
"grad_norm": 0.0,
"learning_rate": 1.2071568714813814e-05,
"loss": 0.0,
"step": 4620
},
{
"epoch": 2.32,
"grad_norm": 0.0,
"learning_rate": 1.2006819785279197e-05,
"loss": 0.0,
"step": 4640
},
{
"epoch": 2.33,
"grad_norm": 0.0,
"learning_rate": 1.1941983070945984e-05,
"loss": 0.0,
"step": 4660
},
{
"epoch": 2.34,
"grad_norm": 0.0,
"learning_rate": 1.187706140798209e-05,
"loss": 0.0,
"step": 4680
},
{
"epoch": 2.35,
"grad_norm": 0.0,
"learning_rate": 1.1812057636271374e-05,
"loss": 0.0,
"step": 4700
},
{
"epoch": 2.36,
"grad_norm": 0.0,
"learning_rate": 1.1746974599289398e-05,
"loss": 0.0,
"step": 4720
},
{
"epoch": 2.37,
"grad_norm": 0.0,
"learning_rate": 1.1681815143979036e-05,
"loss": 0.0,
"step": 4740
},
{
"epoch": 2.38,
"grad_norm": 0.0,
"learning_rate": 1.1616582120625949e-05,
"loss": 0.0,
"step": 4760
},
{
"epoch": 2.39,
"grad_norm": 0.0,
"learning_rate": 1.1551278382733908e-05,
"loss": 0.0,
"step": 4780
},
{
"epoch": 2.4,
"grad_norm": 0.0,
"learning_rate": 1.148590678689996e-05,
"loss": 0.0,
"step": 4800
},
{
"epoch": 2.41,
"grad_norm": 0.0,
"learning_rate": 1.1420470192689482e-05,
"loss": 0.0,
"step": 4820
},
{
"epoch": 2.42,
"grad_norm": 0.0,
"learning_rate": 1.135497146251109e-05,
"loss": 0.0,
"step": 4840
},
{
"epoch": 2.43,
"grad_norm": 0.0,
"learning_rate": 1.1289413461491432e-05,
"loss": 0.0,
"step": 4860
},
{
"epoch": 2.44,
"grad_norm": 0.0,
"learning_rate": 1.1223799057349846e-05,
"loss": 0.0,
"step": 4880
},
{
"epoch": 2.45,
"grad_norm": 0.0,
"learning_rate": 1.1158131120272935e-05,
"loss": 0.0,
"step": 4900
},
{
"epoch": 2.46,
"grad_norm": 0.0,
"learning_rate": 1.1092412522788996e-05,
"loss": 0.0,
"step": 4920
},
{
"epoch": 2.4699999999999998,
"grad_norm": 0.0,
"learning_rate": 1.1026646139642385e-05,
"loss": 0.0,
"step": 4940
},
{
"epoch": 2.48,
"grad_norm": 0.0,
"learning_rate": 1.0960834847667753e-05,
"loss": 0.0,
"step": 4960
},
{
"epoch": 2.49,
"grad_norm": 0.0,
"learning_rate": 1.089498152566421e-05,
"loss": 0.0,
"step": 4980
},
{
"epoch": 2.5,
"grad_norm": 0.0,
"learning_rate": 1.0829089054269397e-05,
"loss": 0.0,
"step": 5000
},
{
"epoch": 2.51,
"grad_norm": 0.0,
"learning_rate": 1.0763160315833478e-05,
"loss": 0.0,
"step": 5020
},
{
"epoch": 2.52,
"grad_norm": 0.0,
"learning_rate": 1.0697198194293044e-05,
"loss": 0.0,
"step": 5040
},
{
"epoch": 2.5300000000000002,
"grad_norm": 0.0,
"learning_rate": 1.0631205575044982e-05,
"loss": 0.0,
"step": 5060
},
{
"epoch": 2.54,
"grad_norm": 0.0,
"learning_rate": 1.0565185344820248e-05,
"loss": 0.0,
"step": 5080
},
{
"epoch": 2.55,
"grad_norm": 0.0,
"learning_rate": 1.049914039155758e-05,
"loss": 0.0,
"step": 5100
},
{
"epoch": 2.56,
"grad_norm": 0.0,
"learning_rate": 1.0433073604277199e-05,
"loss": 0.0,
"step": 5120
},
{
"epoch": 2.57,
"grad_norm": 0.0,
"learning_rate": 1.0366987872954404e-05,
"loss": 0.0,
"step": 5140
},
{
"epoch": 2.58,
"grad_norm": 0.0,
"learning_rate": 1.030088608839317e-05,
"loss": 0.0,
"step": 5160
},
{
"epoch": 2.59,
"grad_norm": 0.0,
"learning_rate": 1.0234771142099689e-05,
"loss": 0.0,
"step": 5180
},
{
"epoch": 2.6,
"grad_norm": 0.0,
"learning_rate": 1.0168645926155902e-05,
"loss": 0.0,
"step": 5200
},
{
"epoch": 2.61,
"grad_norm": 0.0,
"learning_rate": 1.010251333309297e-05,
"loss": 0.0,
"step": 5220
},
{
"epoch": 2.62,
"grad_norm": 0.0,
"learning_rate": 1.003637625576475e-05,
"loss": 0.0,
"step": 5240
},
{
"epoch": 2.63,
"grad_norm": 0.0,
"learning_rate": 9.97023758722126e-06,
"loss": 0.0,
"step": 5260
},
{
"epoch": 2.64,
"grad_norm": 0.0,
"learning_rate": 9.90410022058212e-06,
"loss": 0.0,
"step": 5280
},
{
"epoch": 2.65,
"grad_norm": 0.0,
"learning_rate": 9.837967048910006e-06,
"loss": 0.0,
"step": 5300
},
{
"epoch": 2.66,
"grad_norm": 0.0,
"learning_rate": 9.771840965084088e-06,
"loss": 0.0,
"step": 5320
},
{
"epoch": 2.67,
"grad_norm": 0.0,
"learning_rate": 9.705724861673488e-06,
"loss": 0.0,
"step": 5340
},
{
"epoch": 2.68,
"grad_norm": 0.0,
"learning_rate": 9.639621630810758e-06,
"loss": 0.0,
"step": 5360
},
{
"epoch": 2.69,
"grad_norm": 0.0,
"learning_rate": 9.573534164065363e-06,
"loss": 0.0,
"step": 5380
},
{
"epoch": 2.7,
"grad_norm": 0.0,
"learning_rate": 9.507465352317187e-06,
"loss": 0.0,
"step": 5400
},
{
"epoch": 2.71,
"grad_norm": 0.0,
"learning_rate": 9.441418085630088e-06,
"loss": 0.0,
"step": 5420
},
{
"epoch": 2.7199999999999998,
"grad_norm": 0.0,
"learning_rate": 9.37539525312548e-06,
"loss": 0.0,
"step": 5440
},
{
"epoch": 2.73,
"grad_norm": 0.0,
"learning_rate": 9.309399742855943e-06,
"loss": 0.0,
"step": 5460
},
{
"epoch": 2.74,
"grad_norm": 0.0,
"learning_rate": 9.243434441678884e-06,
"loss": 0.0,
"step": 5480
},
{
"epoch": 2.75,
"grad_norm": 0.0,
"learning_rate": 9.177502235130283e-06,
"loss": 0.0,
"step": 5500
},
{
"epoch": 2.76,
"grad_norm": 0.0,
"learning_rate": 9.11160600729844e-06,
"loss": 0.0,
"step": 5520
},
{
"epoch": 2.77,
"grad_norm": 0.0,
"learning_rate": 9.045748640697832e-06,
"loss": 0.0,
"step": 5540
},
{
"epoch": 2.7800000000000002,
"grad_norm": 0.0,
"learning_rate": 8.979933016143022e-06,
"loss": 0.0,
"step": 5560
},
{
"epoch": 2.79,
"grad_norm": 0.0,
"learning_rate": 8.91416201262265e-06,
"loss": 0.0,
"step": 5580
},
{
"epoch": 2.8,
"grad_norm": 0.0,
"learning_rate": 8.848438507173475e-06,
"loss": 0.0,
"step": 5600
},
{
"epoch": 2.81,
"grad_norm": 0.0,
"learning_rate": 8.782765374754536e-06,
"loss": 0.0,
"step": 5620
},
{
"epoch": 2.82,
"grad_norm": 0.0,
"learning_rate": 8.717145488121397e-06,
"loss": 0.0,
"step": 5640
},
{
"epoch": 2.83,
"grad_norm": 0.0,
"learning_rate": 8.651581717700483e-06,
"loss": 0.0,
"step": 5660
},
{
"epoch": 2.84,
"grad_norm": 0.0,
"learning_rate": 8.586076931463504e-06,
"loss": 0.0,
"step": 5680
},
{
"epoch": 2.85,
"grad_norm": 0.0,
"learning_rate": 8.520633994802014e-06,
"loss": 0.0,
"step": 5700
},
{
"epoch": 2.86,
"grad_norm": 0.0,
"learning_rate": 8.45525577040208e-06,
"loss": 0.0,
"step": 5720
},
{
"epoch": 2.87,
"grad_norm": 0.0,
"learning_rate": 8.389945118119023e-06,
"loss": 0.0,
"step": 5740
},
{
"epoch": 2.88,
"grad_norm": 0.0,
"learning_rate": 8.324704894852354e-06,
"loss": 0.0,
"step": 5760
},
{
"epoch": 2.89,
"grad_norm": 0.0,
"learning_rate": 8.25953795442079e-06,
"loss": 0.0,
"step": 5780
},
{
"epoch": 2.9,
"grad_norm": 0.0,
"learning_rate": 8.19444714743741e-06,
"loss": 0.0,
"step": 5800
},
{
"epoch": 2.91,
"grad_norm": 0.0,
"learning_rate": 8.129435321184985e-06,
"loss": 0.0,
"step": 5820
},
{
"epoch": 2.92,
"grad_norm": 0.0,
"learning_rate": 8.064505319491398e-06,
"loss": 0.0,
"step": 5840
},
{
"epoch": 2.93,
"grad_norm": 0.0,
"learning_rate": 7.999659982605272e-06,
"loss": 0.0,
"step": 5860
},
{
"epoch": 2.94,
"grad_norm": 0.0,
"learning_rate": 7.934902147071708e-06,
"loss": 0.0,
"step": 5880
},
{
"epoch": 2.95,
"grad_norm": 0.0,
"learning_rate": 7.870234645608222e-06,
"loss": 0.0,
"step": 5900
},
{
"epoch": 2.96,
"grad_norm": 0.0,
"learning_rate": 7.80566030698082e-06,
"loss": 0.0,
"step": 5920
},
{
"epoch": 2.9699999999999998,
"grad_norm": 0.0,
"learning_rate": 7.741181955880263e-06,
"loss": 0.0,
"step": 5940
},
{
"epoch": 2.98,
"grad_norm": 0.0,
"learning_rate": 7.676802412798515e-06,
"loss": 0.0,
"step": 5960
},
{
"epoch": 2.99,
"grad_norm": 0.0,
"learning_rate": 7.6125244939053454e-06,
"loss": 0.0,
"step": 5980
},
{
"epoch": 3.0,
"grad_norm": 0.0,
"learning_rate": 7.5483510109251586e-06,
"loss": 0.0,
"step": 6000
},
{
"epoch": 3.01,
"grad_norm": 0.0,
"learning_rate": 7.4842847710139985e-06,
"loss": 0.0,
"step": 6020
},
{
"epoch": 3.02,
"grad_norm": 0.0,
"learning_rate": 7.420328576636742e-06,
"loss": 0.0,
"step": 6040
},
{
"epoch": 3.03,
"grad_norm": 0.0,
"learning_rate": 7.356485225444518e-06,
"loss": 0.0,
"step": 6060
},
{
"epoch": 3.04,
"grad_norm": 0.0,
"learning_rate": 7.292757510152333e-06,
"loss": 0.0,
"step": 6080
},
{
"epoch": 3.05,
"grad_norm": 0.0,
"learning_rate": 7.229148218416905e-06,
"loss": 0.0,
"step": 6100
},
{
"epoch": 3.06,
"grad_norm": 0.0,
"learning_rate": 7.16566013271472e-06,
"loss": 0.0,
"step": 6120
},
{
"epoch": 3.07,
"grad_norm": 0.0,
"learning_rate": 7.1022960302203115e-06,
"loss": 0.0,
"step": 6140
},
{
"epoch": 3.08,
"grad_norm": 0.0,
"learning_rate": 7.039058682684805e-06,
"loss": 0.0,
"step": 6160
},
{
"epoch": 3.09,
"grad_norm": 0.0,
"learning_rate": 6.975950856314636e-06,
"loss": 0.0,
"step": 6180
},
{
"epoch": 3.1,
"grad_norm": 0.0,
"learning_rate": 6.9129753116505734e-06,
"loss": 0.0,
"step": 6200
},
{
"epoch": 3.11,
"grad_norm": 0.0,
"learning_rate": 6.850134803446955e-06,
"loss": 0.0,
"step": 6220
},
{
"epoch": 3.12,
"grad_norm": 0.0,
"learning_rate": 6.7874320805511795e-06,
"loss": 0.0,
"step": 6240
},
{
"epoch": 3.13,
"grad_norm": 0.0,
"learning_rate": 6.724869885783477e-06,
"loss": 0.0,
"step": 6260
},
{
"epoch": 3.14,
"grad_norm": 0.0,
"learning_rate": 6.662450955816917e-06,
"loss": 0.0,
"step": 6280
},
{
"epoch": 3.15,
"grad_norm": 0.0,
"learning_rate": 6.600178021057712e-06,
"loss": 0.0,
"step": 6300
},
{
"epoch": 3.16,
"grad_norm": 0.0,
"learning_rate": 6.538053805525764e-06,
"loss": 0.0,
"step": 6320
},
{
"epoch": 3.17,
"grad_norm": 0.0,
"learning_rate": 6.4760810267355136e-06,
"loss": 0.0,
"step": 6340
},
{
"epoch": 3.18,
"grad_norm": 0.0,
"learning_rate": 6.4142623955770656e-06,
"loss": 0.0,
"step": 6360
},
{
"epoch": 3.19,
"grad_norm": 0.0,
"learning_rate": 6.352600616197615e-06,
"loss": 0.0,
"step": 6380
},
{
"epoch": 3.2,
"grad_norm": 0.0,
"learning_rate": 6.291098385883146e-06,
"loss": 0.0,
"step": 6400
},
{
"epoch": 3.21,
"grad_norm": 0.0,
"learning_rate": 6.22975839494045e-06,
"loss": 0.0,
"step": 6420
},
{
"epoch": 3.22,
"grad_norm": 0.0,
"learning_rate": 6.168583326579456e-06,
"loss": 0.0,
"step": 6440
},
{
"epoch": 3.23,
"grad_norm": 0.0,
"learning_rate": 6.1075758567958225e-06,
"loss": 0.0,
"step": 6460
},
{
"epoch": 3.24,
"grad_norm": 0.0,
"learning_rate": 6.046738654253918e-06,
"loss": 0.0,
"step": 6480
},
{
"epoch": 3.25,
"grad_norm": 0.0,
"learning_rate": 5.986074380170068e-06,
"loss": 0.0,
"step": 6500
},
{
"epoch": 3.26,
"grad_norm": 0.0,
"learning_rate": 5.925585688196145e-06,
"loss": 0.0,
"step": 6520
},
{
"epoch": 3.27,
"grad_norm": 0.0,
"learning_rate": 5.865275224303491e-06,
"loss": 0.0,
"step": 6540
},
{
"epoch": 3.2800000000000002,
"grad_norm": 0.0,
"learning_rate": 5.805145626667175e-06,
"loss": 0.0,
"step": 6560
},
{
"epoch": 3.29,
"grad_norm": 0.0,
"learning_rate": 5.745199525550596e-06,
"loss": 0.0,
"step": 6580
},
{
"epoch": 3.3,
"grad_norm": 0.0,
"learning_rate": 5.685439543190409e-06,
"loss": 0.0,
"step": 6600
},
{
"epoch": 3.31,
"grad_norm": 0.0,
"learning_rate": 5.6258682936818444e-06,
"loss": 0.0,
"step": 6620
},
{
"epoch": 3.32,
"grad_norm": 0.0,
"learning_rate": 5.566488382864335e-06,
"loss": 0.0,
"step": 6640
},
{
"epoch": 3.33,
"grad_norm": 0.0,
"learning_rate": 5.507302408207542e-06,
"loss": 0.0,
"step": 6660
},
{
"epoch": 3.34,
"grad_norm": 0.0,
"learning_rate": 5.448312958697739e-06,
"loss": 0.0,
"step": 6680
},
{
"epoch": 3.35,
"grad_norm": 0.0,
"learning_rate": 5.389522614724536e-06,
"loss": 0.0,
"step": 6700
},
{
"epoch": 3.36,
"grad_norm": 0.0,
"learning_rate": 5.330933947968049e-06,
"loss": 0.0,
"step": 6720
},
{
"epoch": 3.37,
"grad_norm": 0.0,
"learning_rate": 5.27254952128635e-06,
"loss": 0.0,
"step": 6740
},
{
"epoch": 3.38,
"grad_norm": 0.0,
"learning_rate": 5.2143718886034086e-06,
"loss": 0.0,
"step": 6760
},
{
"epoch": 3.39,
"grad_norm": 0.0,
"learning_rate": 5.1564035947973456e-06,
"loss": 0.0,
"step": 6780
},
{
"epoch": 3.4,
"grad_norm": 0.0,
"learning_rate": 5.098647175589118e-06,
"loss": 0.0,
"step": 6800
},
{
"epoch": 3.41,
"grad_norm": 0.0,
"learning_rate": 5.0411051574316165e-06,
"loss": 0.0,
"step": 6820
},
{
"epoch": 3.42,
"grad_norm": 0.0,
"learning_rate": 4.983780057399111e-06,
"loss": 0.0,
"step": 6840
},
{
"epoch": 3.43,
"grad_norm": 0.0,
"learning_rate": 4.926674383077203e-06,
"loss": 0.0,
"step": 6860
},
{
"epoch": 3.44,
"grad_norm": 0.0,
"learning_rate": 4.869790632453076e-06,
"loss": 0.0,
"step": 6880
},
{
"epoch": 3.45,
"grad_norm": 0.0,
"learning_rate": 4.813131293806254e-06,
"loss": 0.0,
"step": 6900
},
{
"epoch": 3.46,
"grad_norm": 0.0,
"learning_rate": 4.7566988455997684e-06,
"loss": 0.0,
"step": 6920
},
{
"epoch": 3.4699999999999998,
"grad_norm": 0.0,
"learning_rate": 4.700495756371713e-06,
"loss": 0.0,
"step": 6940
},
{
"epoch": 3.48,
"grad_norm": 0.0,
"learning_rate": 4.644524484627292e-06,
"loss": 0.0,
"step": 6960
},
{
"epoch": 3.49,
"grad_norm": 0.0,
"learning_rate": 4.588787478731242e-06,
"loss": 0.0,
"step": 6980
},
{
"epoch": 3.5,
"grad_norm": 0.0,
"learning_rate": 4.533287176800773e-06,
"loss": 0.0,
"step": 7000
},
{
"epoch": 3.51,
"grad_norm": 0.0,
"learning_rate": 4.478026006598886e-06,
"loss": 0.0,
"step": 7020
},
{
"epoch": 3.52,
"grad_norm": 0.0,
"learning_rate": 4.4230063854281815e-06,
"loss": 0.0,
"step": 7040
},
{
"epoch": 3.5300000000000002,
"grad_norm": 0.0,
"learning_rate": 4.368230720025137e-06,
"loss": 0.0,
"step": 7060
},
{
"epoch": 3.54,
"grad_norm": 0.0,
"learning_rate": 4.313701406454797e-06,
"loss": 0.0,
"step": 7080
},
{
"epoch": 3.55,
"grad_norm": 0.0,
"learning_rate": 4.259420830005995e-06,
"loss": 0.0,
"step": 7100
},
{
"epoch": 3.56,
"grad_norm": 0.0,
"learning_rate": 4.205391365086981e-06,
"loss": 0.0,
"step": 7120
},
{
"epoch": 3.57,
"grad_norm": 0.0,
"learning_rate": 4.15161537512159e-06,
"loss": 0.0,
"step": 7140
},
{
"epoch": 3.58,
"grad_norm": 0.0,
"learning_rate": 4.098095212445831e-06,
"loss": 0.0,
"step": 7160
},
{
"epoch": 3.59,
"grad_norm": 0.0,
"learning_rate": 4.044833218204998e-06,
"loss": 0.0,
"step": 7180
},
{
"epoch": 3.6,
"grad_norm": 0.0,
"learning_rate": 3.991831722251268e-06,
"loss": 0.0,
"step": 7200
},
{
"epoch": 3.61,
"grad_norm": 0.0,
"learning_rate": 3.939093043041769e-06,
"loss": 0.0,
"step": 7220
},
{
"epoch": 3.62,
"grad_norm": 0.0,
"learning_rate": 3.886619487537188e-06,
"loss": 0.0,
"step": 7240
},
{
"epoch": 3.63,
"grad_norm": 0.0,
"learning_rate": 3.834413351100823e-06,
"loss": 0.0,
"step": 7260
},
{
"epoch": 3.64,
"grad_norm": 0.0,
"learning_rate": 3.7824769173982133e-06,
"loss": 0.0,
"step": 7280
},
{
"epoch": 3.65,
"grad_norm": 0.0,
"learning_rate": 3.7308124582972218e-06,
"loss": 0.0,
"step": 7300
},
{
"epoch": 3.66,
"grad_norm": 0.0,
"learning_rate": 3.6794222337686514e-06,
"loss": 0.0,
"step": 7320
},
{
"epoch": 3.67,
"grad_norm": 0.0,
"learning_rate": 3.628308491787411e-06,
"loss": 0.0,
"step": 7340
},
{
"epoch": 3.68,
"grad_norm": 0.0,
"learning_rate": 3.5774734682341563e-06,
"loss": 0.0,
"step": 7360
},
{
"epoch": 3.69,
"grad_norm": 0.0,
"learning_rate": 3.526919386797504e-06,
"loss": 0.0,
"step": 7380
},
{
"epoch": 3.7,
"grad_norm": 0.0,
"learning_rate": 3.4766484588767436e-06,
"loss": 0.0,
"step": 7400
},
{
"epoch": 3.71,
"grad_norm": 0.0,
"learning_rate": 3.426662883485119e-06,
"loss": 0.0,
"step": 7420
},
{
"epoch": 3.7199999999999998,
"grad_norm": 0.0,
"learning_rate": 3.376964847153634e-06,
"loss": 0.0,
"step": 7440
},
{
"epoch": 3.73,
"grad_norm": 0.0,
"learning_rate": 3.32755652383539e-06,
"loss": 0.0,
"step": 7460
},
{
"epoch": 3.74,
"grad_norm": 0.0,
"learning_rate": 3.2784400748105162e-06,
"loss": 0.0,
"step": 7480
},
{
"epoch": 3.75,
"grad_norm": 0.0,
"learning_rate": 3.2296176485916043e-06,
"loss": 0.0,
"step": 7500
},
{
"epoch": 3.76,
"grad_norm": 1.206122979269253e-09,
"learning_rate": 3.1810913808297374e-06,
"loss": -0.0393,
"step": 7520
},
{
"epoch": 3.77,
"grad_norm": 6.012237818708854e-09,
"learning_rate": 3.132863394221076e-06,
"loss": 0.0,
"step": 7540
},
{
"epoch": 3.7800000000000002,
"grad_norm": 3.776818413570027e-09,
"learning_rate": 3.0849357984139826e-06,
"loss": 0.0,
"step": 7560
},
{
"epoch": 3.79,
"grad_norm": 5.78899381764586e-08,
"learning_rate": 3.0373106899167758e-06,
"loss": 0.0,
"step": 7580
},
{
"epoch": 3.8,
"grad_norm": 4.312326495892194e-09,
"learning_rate": 2.989990152005976e-06,
"loss": 0.0,
"step": 7600
},
{
"epoch": 3.81,
"grad_norm": 5.338903763529856e-10,
"learning_rate": 2.942976254635207e-06,
"loss": 0.0,
"step": 7620
},
{
"epoch": 3.82,
"grad_norm": 1.336624604400538e-12,
"learning_rate": 2.8962710543446504e-06,
"loss": 0.0,
"step": 7640
},
{
"epoch": 3.83,
"grad_norm": 1.2579886021768516e-09,
"learning_rate": 2.849876594171064e-06,
"loss": 0.0,
"step": 7660
},
{
"epoch": 3.84,
"grad_norm": 0.0,
"learning_rate": 2.803794903558439e-06,
"loss": 0.0,
"step": 7680
},
{
"epoch": 3.85,
"grad_norm": 0.0,
"learning_rate": 2.7580279982692017e-06,
"loss": 0.0,
"step": 7700
},
{
"epoch": 3.86,
"grad_norm": 0.0,
"learning_rate": 2.7125778802960557e-06,
"loss": 0.0,
"step": 7720
},
{
"epoch": 3.87,
"grad_norm": 0.0,
"learning_rate": 2.667446537774402e-06,
"loss": 0.0,
"step": 7740
},
{
"epoch": 3.88,
"grad_norm": 0.0,
"learning_rate": 2.622635944895362e-06,
"loss": 0.0,
"step": 7760
},
{
"epoch": 3.89,
"grad_norm": 0.0,
"learning_rate": 2.578148061819441e-06,
"loss": 0.0,
"step": 7780
},
{
"epoch": 3.9,
"grad_norm": 0.0,
"learning_rate": 2.533984834590758e-06,
"loss": 0.0,
"step": 7800
},
{
"epoch": 3.91,
"grad_norm": 0.0,
"learning_rate": 2.490148195051949e-06,
"loss": 0.0,
"step": 7820
},
{
"epoch": 3.92,
"grad_norm": 0.0,
"learning_rate": 2.446640060759632e-06,
"loss": 0.0,
"step": 7840
},
{
"epoch": 3.93,
"grad_norm": 0.0,
"learning_rate": 2.4034623349005492e-06,
"loss": 0.0,
"step": 7860
},
{
"epoch": 3.94,
"grad_norm": 2.392158648945042e-08,
"learning_rate": 2.360616906208311e-06,
"loss": -0.0357,
"step": 7880
},
{
"epoch": 3.95,
"grad_norm": 4.052552737832116e-10,
"learning_rate": 2.3181056488807606e-06,
"loss": 0.0,
"step": 7900
},
{
"epoch": 3.96,
"grad_norm": 1.4914207369098875e-10,
"learning_rate": 2.2759304224980174e-06,
"loss": 0.0,
"step": 7920
},
{
"epoch": 3.9699999999999998,
"grad_norm": 1.1213100670204312e-08,
"learning_rate": 2.234093071941108e-06,
"loss": 0.0,
"step": 7940
},
{
"epoch": 3.98,
"grad_norm": 6.406816483589539e-12,
"learning_rate": 2.1925954273112838e-06,
"loss": 0.0,
"step": 7960
},
{
"epoch": 3.99,
"grad_norm": 8.113453485447675e-13,
"learning_rate": 2.151439303849949e-06,
"loss": 0.0,
"step": 7980
},
{
"epoch": 4.0,
"grad_norm": 1.0131413936964595e-12,
"learning_rate": 2.110626501859275e-06,
"loss": 0.0,
"step": 8000
},
{
"epoch": 4.01,
"grad_norm": 6.270884506109908e-11,
"learning_rate": 2.070158806623438e-06,
"loss": 0.0,
"step": 8020
},
{
"epoch": 4.02,
"grad_norm": 0.0,
"learning_rate": 2.03003798833052e-06,
"loss": 0.0,
"step": 8040
},
{
"epoch": 4.03,
"grad_norm": 0.0,
"learning_rate": 1.9902658019950915e-06,
"loss": 0.0,
"step": 8060
},
{
"epoch": 4.04,
"grad_norm": 0.0,
"learning_rate": 1.950843987381421e-06,
"loss": 0.0,
"step": 8080
},
{
"epoch": 4.05,
"grad_norm": 0.0,
"learning_rate": 1.9117742689273942e-06,
"loss": 0.0,
"step": 8100
},
{
"epoch": 4.06,
"grad_norm": 0.0,
"learning_rate": 1.8730583556690607e-06,
"loss": 0.0,
"step": 8120
},
{
"epoch": 4.07,
"grad_norm": 0.0,
"learning_rate": 1.8346979411658893e-06,
"loss": 0.0,
"step": 8140
},
{
"epoch": 4.08,
"grad_norm": 0.0,
"learning_rate": 1.7966947034266857e-06,
"loss": 0.0,
"step": 8160
},
{
"epoch": 4.09,
"grad_norm": 0.0,
"learning_rate": 1.759050304836174e-06,
"loss": 0.0,
"step": 8180
},
{
"epoch": 4.1,
"grad_norm": 0.0,
"learning_rate": 1.7217663920823069e-06,
"loss": 0.0,
"step": 8200
},
{
"epoch": 4.11,
"grad_norm": 0.0,
"learning_rate": 1.6848445960842075e-06,
"loss": 0.0,
"step": 8220
},
{
"epoch": 4.12,
"grad_norm": 0.0,
"learning_rate": 1.6482865319208408e-06,
"loss": 0.0,
"step": 8240
},
{
"epoch": 4.13,
"grad_norm": 0.0,
"learning_rate": 1.612093798760368e-06,
"loss": 0.0,
"step": 8260
},
{
"epoch": 4.14,
"grad_norm": 0.0,
"learning_rate": 1.5762679797901848e-06,
"loss": 0.0,
"step": 8280
},
{
"epoch": 4.15,
"grad_norm": 0.0,
"learning_rate": 1.5408106421476753e-06,
"loss": 0.0,
"step": 8300
},
{
"epoch": 4.16,
"grad_norm": 0.0,
"learning_rate": 1.5057233368516522e-06,
"loss": 0.0,
"step": 8320
},
{
"epoch": 4.17,
"grad_norm": 0.0,
"learning_rate": 1.4710075987345129e-06,
"loss": 0.0,
"step": 8340
},
{
"epoch": 4.18,
"grad_norm": 0.0,
"learning_rate": 1.4366649463751103e-06,
"loss": 0.0,
"step": 8360
},
{
"epoch": 4.19,
"grad_norm": 0.0,
"learning_rate": 1.4026968820323105e-06,
"loss": 0.0,
"step": 8380
},
{
"epoch": 4.2,
"grad_norm": 0.0,
"learning_rate": 1.3691048915792892e-06,
"loss": 0.0,
"step": 8400
},
{
"epoch": 4.21,
"grad_norm": 0.0,
"learning_rate": 1.3358904444385368e-06,
"loss": 0.0,
"step": 8420
},
{
"epoch": 4.22,
"grad_norm": 0.0,
"learning_rate": 1.303054993517564e-06,
"loss": 0.0,
"step": 8440
},
{
"epoch": 4.23,
"grad_norm": 0.0,
"learning_rate": 1.2705999751453712e-06,
"loss": 0.0,
"step": 8460
},
{
"epoch": 4.24,
"grad_norm": 0.0,
"learning_rate": 1.2385268090095992e-06,
"loss": 0.0,
"step": 8480
},
{
"epoch": 4.25,
"grad_norm": 0.0,
"learning_rate": 1.206836898094439e-06,
"loss": 0.0,
"step": 8500
},
{
"epoch": 4.26,
"grad_norm": 0.0,
"learning_rate": 1.175531628619253e-06,
"loss": 0.0,
"step": 8520
},
{
"epoch": 4.27,
"grad_norm": 0.0,
"learning_rate": 1.1446123699779433e-06,
"loss": 0.0,
"step": 8540
},
{
"epoch": 4.28,
"grad_norm": 0.0,
"learning_rate": 1.1140804746790512e-06,
"loss": 0.0,
"step": 8560
},
{
"epoch": 4.29,
"grad_norm": 0.0,
"learning_rate": 1.083937278286582e-06,
"loss": 0.0,
"step": 8580
},
{
"epoch": 4.3,
"grad_norm": 0.0,
"learning_rate": 1.0541840993616003e-06,
"loss": 0.0,
"step": 8600
},
{
"epoch": 4.31,
"grad_norm": 0.0,
"learning_rate": 1.024822239404535e-06,
"loss": 0.0,
"step": 8620
},
{
"epoch": 4.32,
"grad_norm": 0.0,
"learning_rate": 9.958529827982644e-07,
"loss": 0.0,
"step": 8640
},
{
"epoch": 4.33,
"grad_norm": 0.0,
"learning_rate": 9.672775967519144e-07,
"loss": 0.0,
"step": 8660
},
{
"epoch": 4.34,
"grad_norm": 0.0,
"learning_rate": 9.390973312454465e-07,
"loss": 0.0,
"step": 8680
},
{
"epoch": 4.35,
"grad_norm": 0.0,
"learning_rate": 9.11313418974965e-07,
"loss": 0.0,
"step": 8700
},
{
"epoch": 4.36,
"grad_norm": 0.0,
"learning_rate": 8.839270752987972e-07,
"loss": 0.0,
"step": 8720
},
{
"epoch": 4.37,
"grad_norm": 0.0,
"learning_rate": 8.569394981843382e-07,
"loss": 0.0,
"step": 8740
},
{
"epoch": 4.38,
"grad_norm": 0.0,
"learning_rate": 8.303518681556355e-07,
"loss": 0.0,
"step": 8760
},
{
"epoch": 4.39,
"grad_norm": 0.0,
"learning_rate": 8.041653482417622e-07,
"loss": 0.0,
"step": 8780
},
{
"epoch": 4.4,
"grad_norm": 0.0,
"learning_rate": 7.783810839259287e-07,
"loss": 0.0,
"step": 8800
},
{
"epoch": 4.41,
"grad_norm": 0.0,
"learning_rate": 7.530002030953854e-07,
"loss": 0.0,
"step": 8820
},
{
"epoch": 4.42,
"grad_norm": 0.0,
"learning_rate": 7.28023815992086e-07,
"loss": 0.0,
"step": 8840
},
{
"epoch": 4.43,
"grad_norm": 0.0,
"learning_rate": 7.034530151641117e-07,
"loss": 0.0,
"step": 8860
},
{
"epoch": 4.44,
"grad_norm": 0.0,
"learning_rate": 6.792888754178906e-07,
"loss": 0.0,
"step": 8880
},
{
"epoch": 4.45,
"grad_norm": 0.0,
"learning_rate": 6.555324537711749e-07,
"loss": 0.0,
"step": 8900
},
{
"epoch": 4.46,
"grad_norm": 0.0,
"learning_rate": 6.321847894068089e-07,
"loss": 0.0,
"step": 8920
},
{
"epoch": 4.47,
"grad_norm": 0.0,
"learning_rate": 6.092469036272664e-07,
"loss": 0.0,
"step": 8940
},
{
"epoch": 4.48,
"grad_norm": 0.0,
"learning_rate": 5.867197998099783e-07,
"loss": 0.0,
"step": 8960
},
{
"epoch": 4.49,
"grad_norm": 0.0,
"learning_rate": 5.646044633634484e-07,
"loss": 0.0,
"step": 8980
},
{
"epoch": 4.5,
"grad_norm": 0.0,
"learning_rate": 5.42901861684132e-07,
"loss": 0.0,
"step": 9000
},
{
"epoch": 4.51,
"grad_norm": 0.0,
"learning_rate": 5.216129441141371e-07,
"loss": 0.0,
"step": 9020
},
{
"epoch": 4.52,
"grad_norm": 0.0,
"learning_rate": 5.007386418996818e-07,
"loss": 0.0,
"step": 9040
},
{
"epoch": 4.53,
"grad_norm": 0.0,
"learning_rate": 4.802798681503673e-07,
"loss": 0.0,
"step": 9060
},
{
"epoch": 4.54,
"grad_norm": 0.0,
"learning_rate": 4.60237517799238e-07,
"loss": 0.0,
"step": 9080
},
{
"epoch": 4.55,
"grad_norm": 0.0,
"learning_rate": 4.4061246756362364e-07,
"loss": 0.0,
"step": 9100
},
{
"epoch": 4.5600000000000005,
"grad_norm": 0.0,
"learning_rate": 4.2140557590680034e-07,
"loss": 0.0,
"step": 9120
},
{
"epoch": 4.57,
"grad_norm": 0.0,
"learning_rate": 4.02617683000428e-07,
"loss": 0.0,
"step": 9140
},
{
"epoch": 4.58,
"grad_norm": 0.0,
"learning_rate": 3.842496106878102e-07,
"loss": 0.0,
"step": 9160
},
{
"epoch": 4.59,
"grad_norm": 0.0,
"learning_rate": 3.6630216244793236e-07,
"loss": 0.0,
"step": 9180
},
{
"epoch": 4.6,
"grad_norm": 0.0,
"learning_rate": 3.487761233603204e-07,
"loss": 0.0,
"step": 9200
},
{
"epoch": 4.61,
"grad_norm": 0.0,
"learning_rate": 3.3167226007070454e-07,
"loss": 0.0,
"step": 9220
},
{
"epoch": 4.62,
"grad_norm": 0.0,
"learning_rate": 3.149913207574695e-07,
"loss": 0.0,
"step": 9240
},
{
"epoch": 4.63,
"grad_norm": 0.0,
"learning_rate": 2.987340350989421e-07,
"loss": 0.0,
"step": 9260
},
{
"epoch": 4.64,
"grad_norm": 0.0,
"learning_rate": 2.829011142414628e-07,
"loss": 0.0,
"step": 9280
},
{
"epoch": 4.65,
"grad_norm": 0.0,
"learning_rate": 2.674932507682815e-07,
"loss": 0.0,
"step": 9300
},
{
"epoch": 4.66,
"grad_norm": 0.0,
"learning_rate": 2.5251111866926303e-07,
"loss": 0.0,
"step": 9320
},
{
"epoch": 4.67,
"grad_norm": 0.0,
"learning_rate": 2.3795537331139596e-07,
"loss": 0.0,
"step": 9340
},
{
"epoch": 4.68,
"grad_norm": 0.0,
"learning_rate": 2.238266514101417e-07,
"loss": 0.0,
"step": 9360
},
{
"epoch": 4.6899999999999995,
"grad_norm": 0.0,
"learning_rate": 2.101255710015626e-07,
"loss": 0.0,
"step": 9380
},
{
"epoch": 4.7,
"grad_norm": 0.0,
"learning_rate": 1.9685273141530348e-07,
"loss": 0.0,
"step": 9400
},
{
"epoch": 4.71,
"grad_norm": 0.0,
"learning_rate": 1.84008713248367e-07,
"loss": 0.0,
"step": 9420
},
{
"epoch": 4.72,
"grad_norm": 0.0,
"learning_rate": 1.7159407833971298e-07,
"loss": 0.0,
"step": 9440
},
{
"epoch": 4.73,
"grad_norm": 0.0,
"learning_rate": 1.5960936974569353e-07,
"loss": 0.0,
"step": 9460
},
{
"epoch": 4.74,
"grad_norm": 0.0,
"learning_rate": 1.4805511171628538e-07,
"loss": 0.0,
"step": 9480
},
{
"epoch": 4.75,
"grad_norm": 0.0,
"learning_rate": 1.3693180967216614e-07,
"loss": 0.0,
"step": 9500
},
{
"epoch": 4.76,
"grad_norm": 0.0,
"learning_rate": 1.2623995018260283e-07,
"loss": 0.0,
"step": 9520
},
{
"epoch": 4.77,
"grad_norm": 0.0,
"learning_rate": 1.1598000094416362e-07,
"loss": 0.0,
"step": 9540
},
{
"epoch": 4.78,
"grad_norm": 0.0,
"learning_rate": 1.061524107602696e-07,
"loss": 0.0,
"step": 9560
},
{
"epoch": 4.79,
"grad_norm": 0.0,
"learning_rate": 9.675760952154833e-08,
"loss": 0.0,
"step": 9580
},
{
"epoch": 4.8,
"grad_norm": 0.0,
"learning_rate": 8.779600818704437e-08,
"loss": 0.0,
"step": 9600
},
{
"epoch": 4.8100000000000005,
"grad_norm": 0.0,
"learning_rate": 7.926799876622926e-08,
"loss": 0.0,
"step": 9620
},
{
"epoch": 4.82,
"grad_norm": 0.0,
"learning_rate": 7.117395430186414e-08,
"loss": 0.0,
"step": 9640
},
{
"epoch": 4.83,
"grad_norm": 0.0,
"learning_rate": 6.351422885367276e-08,
"loss": 0.0,
"step": 9660
},
{
"epoch": 4.84,
"grad_norm": 0.0,
"learning_rate": 5.628915748286057e-08,
"loss": 0.0,
"step": 9680
},
{
"epoch": 4.85,
"grad_norm": 0.0,
"learning_rate": 4.9499056237457544e-08,
"loss": 0.0,
"step": 9700
},
{
"epoch": 4.86,
"grad_norm": 0.0,
"learning_rate": 4.31442221384859e-08,
"loss": 0.0,
"step": 9720
},
{
"epoch": 4.87,
"grad_norm": 0.0,
"learning_rate": 3.7224933166979396e-08,
"loss": 0.0,
"step": 9740
},
{
"epoch": 4.88,
"grad_norm": 0.0,
"learning_rate": 3.174144825180969e-08,
"loss": 0.0,
"step": 9760
},
{
"epoch": 4.89,
"grad_norm": 0.0,
"learning_rate": 2.6694007258374344e-08,
"loss": 0.0,
"step": 9780
},
{
"epoch": 4.9,
"grad_norm": 0.0,
"learning_rate": 2.208283097809183e-08,
"loss": 0.0,
"step": 9800
},
{
"epoch": 4.91,
"grad_norm": 0.0,
"learning_rate": 1.790812111875151e-08,
"loss": 0.0,
"step": 9820
},
{
"epoch": 4.92,
"grad_norm": 0.0,
"learning_rate": 1.4170060295687348e-08,
"loss": 0.0,
"step": 9840
},
{
"epoch": 4.93,
"grad_norm": 0.0,
"learning_rate": 1.0868812023787646e-08,
"loss": 0.0,
"step": 9860
},
{
"epoch": 4.9399999999999995,
"grad_norm": 0.0,
"learning_rate": 8.004520710347408e-09,
"loss": 0.0,
"step": 9880
},
{
"epoch": 4.95,
"grad_norm": 0.0,
"learning_rate": 5.577311648748973e-09,
"loss": 0.0,
"step": 9900
},
{
"epoch": 4.96,
"grad_norm": 0.0,
"learning_rate": 3.587291012977501e-09,
"loss": 0.0,
"step": 9920
},
{
"epoch": 4.97,
"grad_norm": 0.0,
"learning_rate": 2.0345458529835715e-09,
"loss": 0.0,
"step": 9940
},
{
"epoch": 4.98,
"grad_norm": 0.0,
"learning_rate": 9.191440908706828e-10,
"loss": 0.0,
"step": 9960
},
{
"epoch": 4.99,
"grad_norm": 0.0,
"learning_rate": 2.41134517925401e-10,
"loss": 0.0,
"step": 9980
},
{
"epoch": 5.0,
"grad_norm": 0.0,
"learning_rate": 5.467924824031912e-13,
"loss": 0.0,
"step": 10000
}
],
"logging_steps": 20,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.42298541948928e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
Free AI Image Generator No sign-up. Instant results. Open Now