Instructions to use qbao775/Fusion-Conflict-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use qbao775/Fusion-Conflict-8B with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("qbao775/Fusion-Conflict-8B", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.766525149345398, | |
| "learning_rate": 7.6e-07, | |
| "loss": -0.0699, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 1.2742258310317993, | |
| "learning_rate": 1.56e-06, | |
| "loss": -0.0817, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 3.1112349033355713, | |
| "learning_rate": 2.3600000000000003e-06, | |
| "loss": -0.0788, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 39.28089141845703, | |
| "learning_rate": 3.1600000000000002e-06, | |
| "loss": -0.1878, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.4309356212615967, | |
| "learning_rate": 3.96e-06, | |
| "loss": -0.0402, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 8.633565902709961, | |
| "learning_rate": 4.76e-06, | |
| "loss": -0.0801, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.7842857837677, | |
| "learning_rate": 5.560000000000001e-06, | |
| "loss": -0.098, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.09758752584457397, | |
| "learning_rate": 6.360000000000001e-06, | |
| "loss": -0.1227, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 4.8647260665893555, | |
| "learning_rate": 7.16e-06, | |
| "loss": -0.0446, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 4.039462089538574, | |
| "learning_rate": 7.960000000000002e-06, | |
| "loss": -0.1432, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.05541618913412094, | |
| "learning_rate": 8.76e-06, | |
| "loss": -0.1669, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 32.180450439453125, | |
| "learning_rate": 9.56e-06, | |
| "loss": -0.072, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.8857631683349609, | |
| "learning_rate": 1.036e-05, | |
| "loss": -0.0392, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.09615501761436462, | |
| "learning_rate": 1.1160000000000002e-05, | |
| "loss": -0.035, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 4.058862209320068, | |
| "learning_rate": 1.196e-05, | |
| "loss": -0.0466, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.292805939912796, | |
| "learning_rate": 1.2760000000000001e-05, | |
| "loss": -0.0186, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.002572560915723443, | |
| "learning_rate": 1.3560000000000002e-05, | |
| "loss": -0.0461, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.032721325755119324, | |
| "learning_rate": 1.4360000000000001e-05, | |
| "loss": -0.0244, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.014479526318609715, | |
| "learning_rate": 1.516e-05, | |
| "loss": -0.0315, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.0007994524785317481, | |
| "learning_rate": 1.5960000000000003e-05, | |
| "loss": -0.0677, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.0004835372092202306, | |
| "learning_rate": 1.6760000000000002e-05, | |
| "loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.9216458895243704e-05, | |
| "learning_rate": 1.756e-05, | |
| "loss": -0.0365, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.3274141565489117e-05, | |
| "learning_rate": 1.8360000000000004e-05, | |
| "loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.9133960904582636e-06, | |
| "learning_rate": 1.916e-05, | |
| "loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.000470530241727829, | |
| "learning_rate": 1.9960000000000002e-05, | |
| "loss": -0.019, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 2.208295882155653e-06, | |
| "learning_rate": 1.999980260856137e-05, | |
| "loss": -0.0024, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.001379099558107555, | |
| "learning_rate": 1.999916834015426e-05, | |
| "loss": -0.0135, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.0030259762424975634, | |
| "learning_rate": 1.9998096675731135e-05, | |
| "loss": -0.001, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.0003752721822820604, | |
| "learning_rate": 1.9996587662170075e-05, | |
| "loss": -0.0611, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.0004474676970858127, | |
| "learning_rate": 1.9994641365480218e-05, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.0006332076736725867, | |
| "learning_rate": 1.999225787079888e-05, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.00026314359274692833, | |
| "learning_rate": 1.9989437282387855e-05, | |
| "loss": -0.0392, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.008587045595049858, | |
| "learning_rate": 1.9986179723628806e-05, | |
| "loss": -0.0055, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 9.715045598568395e-05, | |
| "learning_rate": 1.998248533701791e-05, | |
| "loss": 0.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.0002863932168111205, | |
| "learning_rate": 1.9978354284159604e-05, | |
| "loss": -0.0022, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.0512692824704573e-05, | |
| "learning_rate": 1.9973786745759523e-05, | |
| "loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 51.87382507324219, | |
| "learning_rate": 1.9968782921616595e-05, | |
| "loss": -0.0265, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.0003187332767993212, | |
| "learning_rate": 1.99633430306143e-05, | |
| "loss": -0.0183, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.00012655163300223649, | |
| "learning_rate": 1.995746731071109e-05, | |
| "loss": 0.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.217572389577981e-06, | |
| "learning_rate": 1.9951156018929986e-05, | |
| "loss": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 2.5256804292439483e-07, | |
| "learning_rate": 1.994440943134734e-05, | |
| "loss": 0.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 4.53083657703246e-06, | |
| "learning_rate": 1.9937227843080746e-05, | |
| "loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 8.866226153259049e-08, | |
| "learning_rate": 1.9929611568276146e-05, | |
| "loss": 0.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 7.812383984173721e-08, | |
| "learning_rate": 1.9921560940094068e-05, | |
| "loss": 0.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.4238279444356294e-09, | |
| "learning_rate": 1.9913076310695068e-05, | |
| "loss": 0.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.6094839350166978e-10, | |
| "learning_rate": 1.9904158051224324e-05, | |
| "loss": 0.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.98948065517954e-05, | |
| "loss": 0.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.988502222147317e-05, | |
| "loss": 0.0, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9874805488255942e-05, | |
| "loss": 0.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9864156799056725e-05, | |
| "loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.985307661968368e-05, | |
| "loss": 0.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9841565434819746e-05, | |
| "loss": 0.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9829623748001447e-05, | |
| "loss": 0.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.00012120811879867688, | |
| "learning_rate": 1.981725208159684e-05, | |
| "loss": -0.0181, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 7.75166608946165e-06, | |
| "learning_rate": 1.98044509767827e-05, | |
| "loss": 0.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 9.087637408811133e-06, | |
| "learning_rate": 1.979122099352082e-05, | |
| "loss": 0.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 8.575042556913104e-06, | |
| "learning_rate": 1.9777562710533528e-05, | |
| "loss": 0.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 4.427300154929981e-06, | |
| "learning_rate": 1.9763476725278364e-05, | |
| "loss": 0.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.748534289054305e-09, | |
| "learning_rate": 1.9748963653921957e-05, | |
| "loss": 0.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.178993327106582e-06, | |
| "learning_rate": 1.9734024131313068e-05, | |
| "loss": 0.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.0376846008002758, | |
| "learning_rate": 1.971865881095481e-05, | |
| "loss": -0.047, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.79782761936076e-05, | |
| "learning_rate": 1.9702868364976068e-05, | |
| "loss": -0.0117, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 3.6909668210682867e-07, | |
| "learning_rate": 1.968665348410211e-05, | |
| "loss": 0.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.4639082766952924e-05, | |
| "learning_rate": 1.9670014877624353e-05, | |
| "loss": -0.014, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.4362817637447733e-06, | |
| "learning_rate": 1.9652953273369344e-05, | |
| "loss": 0.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.5066849048016593e-05, | |
| "learning_rate": 1.9635469417666922e-05, | |
| "loss": -0.0035, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.0002359877253184095, | |
| "learning_rate": 1.9617564075317585e-05, | |
| "loss": 0.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.3228980151325231e-06, | |
| "learning_rate": 1.9599238029559005e-05, | |
| "loss": 0.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 8.186302125068323e-08, | |
| "learning_rate": 1.958049208203179e-05, | |
| "loss": 0.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 7.560096992875742e-10, | |
| "learning_rate": 1.9561327052744422e-05, | |
| "loss": 0.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 5.596998420287491e-10, | |
| "learning_rate": 1.9541743780037358e-05, | |
| "loss": 0.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.471132327068858e-09, | |
| "learning_rate": 1.9521743120546396e-05, | |
| "loss": 0.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 4.28950278075213e-11, | |
| "learning_rate": 1.950132594916517e-05, | |
| "loss": 0.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9480493159006897e-05, | |
| "loss": 0.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9459245661365312e-05, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9437584385674793e-05, | |
| "loss": 0.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.941551027946971e-05, | |
| "loss": 0.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9393024308342982e-05, | |
| "loss": 0.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9370127455903825e-05, | |
| "loss": 0.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9346820723734746e-05, | |
| "loss": 0.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.932310513134771e-05, | |
| "loss": 0.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9298981716139554e-05, | |
| "loss": 0.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9274451533346617e-05, | |
| "loss": 0.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9249515655998547e-05, | |
| "loss": 0.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9224175174871415e-05, | |
| "loss": 0.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9198431198439945e-05, | |
| "loss": 0.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9172284852829074e-05, | |
| "loss": 0.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.914573728176466e-05, | |
| "loss": 0.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9118789646523458e-05, | |
| "loss": 0.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9091443125882336e-05, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9063698916066698e-05, | |
| "loss": 0.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 18.19425392150879, | |
| "learning_rate": 1.9035558230698154e-05, | |
| "loss": -0.021, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 0.0011678176233544946, | |
| "learning_rate": 1.9007022300741457e-05, | |
| "loss": -0.0368, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.425844669342041, | |
| "learning_rate": 1.897809237445061e-05, | |
| "loss": -0.0002, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.12504132091999054, | |
| "learning_rate": 1.894876971731433e-05, | |
| "loss": -0.0015, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 3.9395573026013153e-07, | |
| "learning_rate": 1.8919055612000613e-05, | |
| "loss": 0.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.2863043821198517e-06, | |
| "learning_rate": 1.8888951358300694e-05, | |
| "loss": 0.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 3.974708306486718e-06, | |
| "learning_rate": 1.8858458273072158e-05, | |
| "loss": -0.0137, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 3.261742676841095e-05, | |
| "learning_rate": 1.882757769018133e-05, | |
| "loss": 0.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.98726092751167e-08, | |
| "learning_rate": 1.879631096044495e-05, | |
| "loss": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 3.763807967516186e-07, | |
| "learning_rate": 1.876465945157106e-05, | |
| "loss": 0.0, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.658192777005297e-09, | |
| "learning_rate": 1.8732624548099204e-05, | |
| "loss": 0.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.2280678696185987e-09, | |
| "learning_rate": 1.8700207651339832e-05, | |
| "loss": 0.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 2.058490267131674e-09, | |
| "learning_rate": 1.8667410179313027e-05, | |
| "loss": 0.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.4351135731172082e-11, | |
| "learning_rate": 1.863423356668646e-05, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8600679264712652e-05, | |
| "loss": 0.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8566748741165465e-05, | |
| "loss": 0.0, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8532443480275924e-05, | |
| "loss": 0.0, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8497764982667275e-05, | |
| "loss": 0.0, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8462714765289342e-05, | |
| "loss": 0.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8427294361352176e-05, | |
| "loss": 0.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8391505320259e-05, | |
| "loss": 0.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8355349207538408e-05, | |
| "loss": 0.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1400000000000001, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8318827604775902e-05, | |
| "loss": 0.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8281942109544698e-05, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8244694335335853e-05, | |
| "loss": 0.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.820708591148767e-05, | |
| "loss": 0.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.816911848311445e-05, | |
| "loss": 0.0, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8130793711034506e-05, | |
| "loss": 0.0, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8092113271697522e-05, | |
| "loss": 0.0, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8053078857111218e-05, | |
| "loss": 0.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.801369217476735e-05, | |
| "loss": 0.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7973954947566995e-05, | |
| "loss": 0.0, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7933868913745205e-05, | |
| "loss": 0.0, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.789343582679495e-05, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.785265745539045e-05, | |
| "loss": 0.0, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.781153558330977e-05, | |
| "loss": 0.0, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7770072009356805e-05, | |
| "loss": 0.0, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.772826854728261e-05, | |
| "loss": 0.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7686127025706038e-05, | |
| "loss": 0.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7643649288033766e-05, | |
| "loss": 0.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.760083719237964e-05, | |
| "loss": 0.0, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.755769261148343e-05, | |
| "loss": 0.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7514217432628856e-05, | |
| "loss": 0.0, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7470413557561098e-05, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 6.09237849857891e-06, | |
| "learning_rate": 1.7426282902403545e-05, | |
| "loss": -0.0595, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 6.212492280610604e-06, | |
| "learning_rate": 1.7381827397574017e-05, | |
| "loss": 0.0, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.1541774256329518e-06, | |
| "learning_rate": 1.733704898770032e-05, | |
| "loss": 0.0, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.3900000000000001, | |
| "grad_norm": 6.421708462767128e-07, | |
| "learning_rate": 1.7291949631535155e-05, | |
| "loss": 0.0, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 3.152386085503167e-08, | |
| "learning_rate": 1.7246531301870467e-05, | |
| "loss": 0.0, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 8.747147717258485e-08, | |
| "learning_rate": 1.720079598545113e-05, | |
| "loss": 0.0, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 3.257514258336869e-09, | |
| "learning_rate": 1.7154745682888045e-05, | |
| "loss": 0.0, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 0.06368578225374222, | |
| "learning_rate": 1.7108382408570626e-05, | |
| "loss": -0.0118, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 38.9387321472168, | |
| "learning_rate": 1.7061708190578695e-05, | |
| "loss": -0.01, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 3.736419103006483e-07, | |
| "learning_rate": 1.7014725070593742e-05, | |
| "loss": 0.0, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 7.977614018273016e-07, | |
| "learning_rate": 1.6967435103809646e-05, | |
| "loss": -0.0327, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.0230847919956432e-06, | |
| "learning_rate": 1.691984035884275e-05, | |
| "loss": 0.0, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 18.049253463745117, | |
| "learning_rate": 1.6871942917641385e-05, | |
| "loss": -0.0016, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 0.00011181381705682725, | |
| "learning_rate": 1.6823744875394788e-05, | |
| "loss": -0.0062, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 85.54637145996094, | |
| "learning_rate": 1.677524834044148e-05, | |
| "loss": -0.051, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 2.2363690277416026e-07, | |
| "learning_rate": 1.6726455434177e-05, | |
| "loss": 0.0, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 5.164109850852583e-08, | |
| "learning_rate": 1.6677368290961133e-05, | |
| "loss": 0.0, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.7374658156654732e-08, | |
| "learning_rate": 1.6627989058024546e-05, | |
| "loss": 0.0, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 1.4617263996430374e-11, | |
| "learning_rate": 1.6578319895374854e-05, | |
| "loss": 0.0, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 5.5272342258660956e-11, | |
| "learning_rate": 1.652836297570214e-05, | |
| "loss": 0.0, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.90355758933336e-10, | |
| "learning_rate": 1.64781204842839e-05, | |
| "loss": 0.0, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.5699999999999998, | |
| "grad_norm": 1.7849938541147914e-11, | |
| "learning_rate": 1.6427594618889484e-05, | |
| "loss": 0.0, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.1557519264543403e-12, | |
| "learning_rate": 1.6376787589683914e-05, | |
| "loss": 0.0, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.5899999999999999, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6325701619131246e-05, | |
| "loss": 0.0, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6274338941897325e-05, | |
| "loss": 0.0, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.6099999999999999, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6222701804752047e-05, | |
| "loss": 0.0, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6170792466471072e-05, | |
| "loss": 0.0, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6118613197737013e-05, | |
| "loss": 0.0, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.606616628104013e-05, | |
| "loss": 0.0, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6013454010578465e-05, | |
| "loss": 0.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.6600000000000001, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5960478692157483e-05, | |
| "loss": 0.0, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5907242643089234e-05, | |
| "loss": 0.0, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5853748192090967e-05, | |
| "loss": 0.0, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5799997679183258e-05, | |
| "loss": 0.0, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5745993455587678e-05, | |
| "loss": 0.0, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5691737883623912e-05, | |
| "loss": 0.0, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.563723333660644e-05, | |
| "loss": 0.0, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5582482198740726e-05, | |
| "loss": 0.0, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5527486865018904e-05, | |
| "loss": 0.0, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.547224974111503e-05, | |
| "loss": 0.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.541677324327985e-05, | |
| "loss": 0.0, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5361059798235093e-05, | |
| "loss": 0.0, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5305111843067343e-05, | |
| "loss": 0.0, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5248931825121393e-05, | |
| "loss": 0.0, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5192522201893236e-05, | |
| "loss": 0.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5135885440922522e-05, | |
| "loss": 0.0, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.8199999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5079024019684668e-05, | |
| "loss": 0.0, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.502194042548243e-05, | |
| "loss": 0.0, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4964637155337156e-05, | |
| "loss": 0.0, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4907116715879511e-05, | |
| "loss": 0.0, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.8599999999999999, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.484938162323986e-05, | |
| "loss": 0.0, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 3.1729632610222325e-07, | |
| "learning_rate": 1.4791434402938191e-05, | |
| "loss": -0.004, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.9630384688061895e-06, | |
| "learning_rate": 1.4733277589773641e-05, | |
| "loss": 0.0, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.8900000000000001, | |
| "grad_norm": 6.645004759775475e-07, | |
| "learning_rate": 1.4674913727713623e-05, | |
| "loss": 0.0, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 8.223436452681199e-06, | |
| "learning_rate": 1.4616345369782534e-05, | |
| "loss": -0.0073, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.9100000000000001, | |
| "grad_norm": 6.144579742795031e-07, | |
| "learning_rate": 1.4557575077950085e-05, | |
| "loss": 0.0, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 6.885377956677985e-07, | |
| "learning_rate": 1.4498605423019234e-05, | |
| "loss": 0.0, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.9300000000000002, | |
| "grad_norm": 1.0231551716977094e-11, | |
| "learning_rate": 1.4439438984513716e-05, | |
| "loss": 0.0, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.5923303257545096e-12, | |
| "learning_rate": 1.438007835056523e-05, | |
| "loss": 0.0, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 3.1159091973265163e-10, | |
| "learning_rate": 1.4320526117800201e-05, | |
| "loss": 0.0, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.0668846361916451e-10, | |
| "learning_rate": 1.4260784891226217e-05, | |
| "loss": 0.0, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 7.081080860560363e-11, | |
| "learning_rate": 1.4200857284118067e-05, | |
| "loss": 0.0, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4140745917903413e-05, | |
| "loss": 0.0, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4080453422048152e-05, | |
| "loss": 0.0, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.401998243394138e-05, | |
| "loss": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3959335598780009e-05, | |
| "loss": 0.0, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3898515569453076e-05, | |
| "loss": 0.0, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3837525006425698e-05, | |
| "loss": 0.0, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3776366577622681e-05, | |
| "loss": 0.0, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3715042958311831e-05, | |
| "loss": 0.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.365355683098691e-05, | |
| "loss": 0.0, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3591910885250318e-05, | |
| "loss": 0.0, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3530107817695433e-05, | |
| "loss": 0.0, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3468150331788634e-05, | |
| "loss": 0.0, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3406041137751076e-05, | |
| "loss": 0.0, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3343782952440109e-05, | |
| "loss": 0.0, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3281378499230448e-05, | |
| "loss": 0.0, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3218830507895035e-05, | |
| "loss": 0.0, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3156141714485647e-05, | |
| "loss": 0.0, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3093314861213187e-05, | |
| "loss": 0.0, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3030352696327741e-05, | |
| "loss": 0.0, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.296725797399838e-05, | |
| "loss": 0.0, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2904033454192653e-05, | |
| "loss": 0.0, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2840681902555876e-05, | |
| "loss": 0.0, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2777206090290148e-05, | |
| "loss": 0.0, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.271360879403313e-05, | |
| "loss": 0.0, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2649892795736588e-05, | |
| "loss": 0.0, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2586060882544695e-05, | |
| "loss": 0.0, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2522115846672129e-05, | |
| "loss": 0.0, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2458060485281903e-05, | |
| "loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2393897600363045e-05, | |
| "loss": 0.0, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2329629998607991e-05, | |
| "loss": 0.0, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2265260491289843e-05, | |
| "loss": 0.0, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.220079189413938e-05, | |
| "loss": 0.0, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2136227027221887e-05, | |
| "loss": 0.0, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2071568714813814e-05, | |
| "loss": 0.0, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2006819785279197e-05, | |
| "loss": 0.0, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1941983070945984e-05, | |
| "loss": 0.0, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.187706140798209e-05, | |
| "loss": 0.0, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1812057636271374e-05, | |
| "loss": 0.0, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1746974599289398e-05, | |
| "loss": 0.0, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1681815143979036e-05, | |
| "loss": 0.0, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1616582120625949e-05, | |
| "loss": 0.0, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1551278382733908e-05, | |
| "loss": 0.0, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.148590678689996e-05, | |
| "loss": 0.0, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1420470192689482e-05, | |
| "loss": 0.0, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.135497146251109e-05, | |
| "loss": 0.0, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1289413461491432e-05, | |
| "loss": 0.0, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1223799057349846e-05, | |
| "loss": 0.0, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1158131120272935e-05, | |
| "loss": 0.0, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1092412522788996e-05, | |
| "loss": 0.0, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.4699999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1026646139642385e-05, | |
| "loss": 0.0, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0960834847667753e-05, | |
| "loss": 0.0, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.089498152566421e-05, | |
| "loss": 0.0, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0829089054269397e-05, | |
| "loss": 0.0, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0763160315833478e-05, | |
| "loss": 0.0, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0697198194293044e-05, | |
| "loss": 0.0, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.5300000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0631205575044982e-05, | |
| "loss": 0.0, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0565185344820248e-05, | |
| "loss": 0.0, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.049914039155758e-05, | |
| "loss": 0.0, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0433073604277199e-05, | |
| "loss": 0.0, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0366987872954404e-05, | |
| "loss": 0.0, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.030088608839317e-05, | |
| "loss": 0.0, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0234771142099689e-05, | |
| "loss": 0.0, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0168645926155902e-05, | |
| "loss": 0.0, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.010251333309297e-05, | |
| "loss": 0.0, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.003637625576475e-05, | |
| "loss": 0.0, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.97023758722126e-06, | |
| "loss": 0.0, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.90410022058212e-06, | |
| "loss": 0.0, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.837967048910006e-06, | |
| "loss": 0.0, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.771840965084088e-06, | |
| "loss": 0.0, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.705724861673488e-06, | |
| "loss": 0.0, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.639621630810758e-06, | |
| "loss": 0.0, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.573534164065363e-06, | |
| "loss": 0.0, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.507465352317187e-06, | |
| "loss": 0.0, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.441418085630088e-06, | |
| "loss": 0.0, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.37539525312548e-06, | |
| "loss": 0.0, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.309399742855943e-06, | |
| "loss": 0.0, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.243434441678884e-06, | |
| "loss": 0.0, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.177502235130283e-06, | |
| "loss": 0.0, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.11160600729844e-06, | |
| "loss": 0.0, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.045748640697832e-06, | |
| "loss": 0.0, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.7800000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.979933016143022e-06, | |
| "loss": 0.0, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.91416201262265e-06, | |
| "loss": 0.0, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.848438507173475e-06, | |
| "loss": 0.0, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.782765374754536e-06, | |
| "loss": 0.0, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.717145488121397e-06, | |
| "loss": 0.0, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.651581717700483e-06, | |
| "loss": 0.0, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.586076931463504e-06, | |
| "loss": 0.0, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.520633994802014e-06, | |
| "loss": 0.0, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.45525577040208e-06, | |
| "loss": 0.0, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.389945118119023e-06, | |
| "loss": 0.0, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.324704894852354e-06, | |
| "loss": 0.0, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.25953795442079e-06, | |
| "loss": 0.0, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.19444714743741e-06, | |
| "loss": 0.0, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.129435321184985e-06, | |
| "loss": 0.0, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.064505319491398e-06, | |
| "loss": 0.0, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.999659982605272e-06, | |
| "loss": 0.0, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.934902147071708e-06, | |
| "loss": 0.0, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.870234645608222e-06, | |
| "loss": 0.0, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.80566030698082e-06, | |
| "loss": 0.0, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.9699999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.741181955880263e-06, | |
| "loss": 0.0, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.676802412798515e-06, | |
| "loss": 0.0, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.6125244939053454e-06, | |
| "loss": 0.0, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.5483510109251586e-06, | |
| "loss": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.4842847710139985e-06, | |
| "loss": 0.0, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.420328576636742e-06, | |
| "loss": 0.0, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.356485225444518e-06, | |
| "loss": 0.0, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.292757510152333e-06, | |
| "loss": 0.0, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.229148218416905e-06, | |
| "loss": 0.0, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.16566013271472e-06, | |
| "loss": 0.0, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.1022960302203115e-06, | |
| "loss": 0.0, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.039058682684805e-06, | |
| "loss": 0.0, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.975950856314636e-06, | |
| "loss": 0.0, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.9129753116505734e-06, | |
| "loss": 0.0, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.850134803446955e-06, | |
| "loss": 0.0, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.7874320805511795e-06, | |
| "loss": 0.0, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.724869885783477e-06, | |
| "loss": 0.0, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.662450955816917e-06, | |
| "loss": 0.0, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.600178021057712e-06, | |
| "loss": 0.0, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.538053805525764e-06, | |
| "loss": 0.0, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.4760810267355136e-06, | |
| "loss": 0.0, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.4142623955770656e-06, | |
| "loss": 0.0, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.352600616197615e-06, | |
| "loss": 0.0, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.291098385883146e-06, | |
| "loss": 0.0, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.22975839494045e-06, | |
| "loss": 0.0, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.168583326579456e-06, | |
| "loss": 0.0, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.1075758567958225e-06, | |
| "loss": 0.0, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.046738654253918e-06, | |
| "loss": 0.0, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.986074380170068e-06, | |
| "loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.925585688196145e-06, | |
| "loss": 0.0, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.865275224303491e-06, | |
| "loss": 0.0, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.805145626667175e-06, | |
| "loss": 0.0, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.745199525550596e-06, | |
| "loss": 0.0, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.685439543190409e-06, | |
| "loss": 0.0, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.6258682936818444e-06, | |
| "loss": 0.0, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.566488382864335e-06, | |
| "loss": 0.0, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.507302408207542e-06, | |
| "loss": 0.0, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.448312958697739e-06, | |
| "loss": 0.0, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.389522614724536e-06, | |
| "loss": 0.0, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.330933947968049e-06, | |
| "loss": 0.0, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.27254952128635e-06, | |
| "loss": 0.0, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.2143718886034086e-06, | |
| "loss": 0.0, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.1564035947973456e-06, | |
| "loss": 0.0, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.098647175589118e-06, | |
| "loss": 0.0, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.0411051574316165e-06, | |
| "loss": 0.0, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.983780057399111e-06, | |
| "loss": 0.0, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.926674383077203e-06, | |
| "loss": 0.0, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.869790632453076e-06, | |
| "loss": 0.0, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.813131293806254e-06, | |
| "loss": 0.0, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.7566988455997684e-06, | |
| "loss": 0.0, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 3.4699999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.700495756371713e-06, | |
| "loss": 0.0, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.644524484627292e-06, | |
| "loss": 0.0, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.588787478731242e-06, | |
| "loss": 0.0, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.533287176800773e-06, | |
| "loss": 0.0, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.478026006598886e-06, | |
| "loss": 0.0, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.4230063854281815e-06, | |
| "loss": 0.0, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 3.5300000000000002, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.368230720025137e-06, | |
| "loss": 0.0, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.313701406454797e-06, | |
| "loss": 0.0, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.259420830005995e-06, | |
| "loss": 0.0, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.205391365086981e-06, | |
| "loss": 0.0, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.15161537512159e-06, | |
| "loss": 0.0, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.098095212445831e-06, | |
| "loss": 0.0, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.044833218204998e-06, | |
| "loss": 0.0, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.991831722251268e-06, | |
| "loss": 0.0, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.939093043041769e-06, | |
| "loss": 0.0, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.886619487537188e-06, | |
| "loss": 0.0, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.834413351100823e-06, | |
| "loss": 0.0, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.7824769173982133e-06, | |
| "loss": 0.0, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.7308124582972218e-06, | |
| "loss": 0.0, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.6794222337686514e-06, | |
| "loss": 0.0, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.628308491787411e-06, | |
| "loss": 0.0, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5774734682341563e-06, | |
| "loss": 0.0, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.526919386797504e-06, | |
| "loss": 0.0, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.4766484588767436e-06, | |
| "loss": 0.0, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.426662883485119e-06, | |
| "loss": 0.0, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.376964847153634e-06, | |
| "loss": 0.0, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.32755652383539e-06, | |
| "loss": 0.0, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.2784400748105162e-06, | |
| "loss": 0.0, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.2296176485916043e-06, | |
| "loss": 0.0, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 1.206122979269253e-09, | |
| "learning_rate": 3.1810913808297374e-06, | |
| "loss": -0.0393, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 6.012237818708854e-09, | |
| "learning_rate": 3.132863394221076e-06, | |
| "loss": 0.0, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 3.7800000000000002, | |
| "grad_norm": 3.776818413570027e-09, | |
| "learning_rate": 3.0849357984139826e-06, | |
| "loss": 0.0, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 5.78899381764586e-08, | |
| "learning_rate": 3.0373106899167758e-06, | |
| "loss": 0.0, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 4.312326495892194e-09, | |
| "learning_rate": 2.989990152005976e-06, | |
| "loss": 0.0, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 5.338903763529856e-10, | |
| "learning_rate": 2.942976254635207e-06, | |
| "loss": 0.0, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.336624604400538e-12, | |
| "learning_rate": 2.8962710543446504e-06, | |
| "loss": 0.0, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 1.2579886021768516e-09, | |
| "learning_rate": 2.849876594171064e-06, | |
| "loss": 0.0, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.803794903558439e-06, | |
| "loss": 0.0, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.7580279982692017e-06, | |
| "loss": 0.0, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.7125778802960557e-06, | |
| "loss": 0.0, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.667446537774402e-06, | |
| "loss": 0.0, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.622635944895362e-06, | |
| "loss": 0.0, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.578148061819441e-06, | |
| "loss": 0.0, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.533984834590758e-06, | |
| "loss": 0.0, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.490148195051949e-06, | |
| "loss": 0.0, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.446640060759632e-06, | |
| "loss": 0.0, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4034623349005492e-06, | |
| "loss": 0.0, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 2.392158648945042e-08, | |
| "learning_rate": 2.360616906208311e-06, | |
| "loss": -0.0357, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 4.052552737832116e-10, | |
| "learning_rate": 2.3181056488807606e-06, | |
| "loss": 0.0, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 1.4914207369098875e-10, | |
| "learning_rate": 2.2759304224980174e-06, | |
| "loss": 0.0, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 3.9699999999999998, | |
| "grad_norm": 1.1213100670204312e-08, | |
| "learning_rate": 2.234093071941108e-06, | |
| "loss": 0.0, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 6.406816483589539e-12, | |
| "learning_rate": 2.1925954273112838e-06, | |
| "loss": 0.0, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 8.113453485447675e-13, | |
| "learning_rate": 2.151439303849949e-06, | |
| "loss": 0.0, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.0131413936964595e-12, | |
| "learning_rate": 2.110626501859275e-06, | |
| "loss": 0.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 6.270884506109908e-11, | |
| "learning_rate": 2.070158806623438e-06, | |
| "loss": 0.0, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.03003798833052e-06, | |
| "loss": 0.0, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9902658019950915e-06, | |
| "loss": 0.0, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.950843987381421e-06, | |
| "loss": 0.0, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9117742689273942e-06, | |
| "loss": 0.0, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8730583556690607e-06, | |
| "loss": 0.0, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8346979411658893e-06, | |
| "loss": 0.0, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7966947034266857e-06, | |
| "loss": 0.0, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.759050304836174e-06, | |
| "loss": 0.0, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7217663920823069e-06, | |
| "loss": 0.0, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6848445960842075e-06, | |
| "loss": 0.0, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6482865319208408e-06, | |
| "loss": 0.0, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.612093798760368e-06, | |
| "loss": 0.0, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5762679797901848e-06, | |
| "loss": 0.0, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5408106421476753e-06, | |
| "loss": 0.0, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5057233368516522e-06, | |
| "loss": 0.0, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4710075987345129e-06, | |
| "loss": 0.0, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4366649463751103e-06, | |
| "loss": 0.0, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4026968820323105e-06, | |
| "loss": 0.0, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3691048915792892e-06, | |
| "loss": 0.0, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3358904444385368e-06, | |
| "loss": 0.0, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.303054993517564e-06, | |
| "loss": 0.0, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2705999751453712e-06, | |
| "loss": 0.0, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2385268090095992e-06, | |
| "loss": 0.0, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.206836898094439e-06, | |
| "loss": 0.0, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.175531628619253e-06, | |
| "loss": 0.0, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1446123699779433e-06, | |
| "loss": 0.0, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1140804746790512e-06, | |
| "loss": 0.0, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.083937278286582e-06, | |
| "loss": 0.0, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0541840993616003e-06, | |
| "loss": 0.0, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.024822239404535e-06, | |
| "loss": 0.0, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.958529827982644e-07, | |
| "loss": 0.0, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.672775967519144e-07, | |
| "loss": 0.0, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.390973312454465e-07, | |
| "loss": 0.0, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.11313418974965e-07, | |
| "loss": 0.0, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.839270752987972e-07, | |
| "loss": 0.0, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.569394981843382e-07, | |
| "loss": 0.0, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.303518681556355e-07, | |
| "loss": 0.0, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.041653482417622e-07, | |
| "loss": 0.0, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.783810839259287e-07, | |
| "loss": 0.0, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.530002030953854e-07, | |
| "loss": 0.0, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.28023815992086e-07, | |
| "loss": 0.0, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.034530151641117e-07, | |
| "loss": 0.0, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.792888754178906e-07, | |
| "loss": 0.0, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.555324537711749e-07, | |
| "loss": 0.0, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.321847894068089e-07, | |
| "loss": 0.0, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.092469036272664e-07, | |
| "loss": 0.0, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.867197998099783e-07, | |
| "loss": 0.0, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.646044633634484e-07, | |
| "loss": 0.0, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.42901861684132e-07, | |
| "loss": 0.0, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.216129441141371e-07, | |
| "loss": 0.0, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.007386418996818e-07, | |
| "loss": 0.0, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.802798681503673e-07, | |
| "loss": 0.0, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.60237517799238e-07, | |
| "loss": 0.0, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.4061246756362364e-07, | |
| "loss": 0.0, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.2140557590680034e-07, | |
| "loss": 0.0, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.02617683000428e-07, | |
| "loss": 0.0, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.842496106878102e-07, | |
| "loss": 0.0, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.6630216244793236e-07, | |
| "loss": 0.0, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.487761233603204e-07, | |
| "loss": 0.0, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3167226007070454e-07, | |
| "loss": 0.0, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.149913207574695e-07, | |
| "loss": 0.0, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.987340350989421e-07, | |
| "loss": 0.0, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.829011142414628e-07, | |
| "loss": 0.0, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.674932507682815e-07, | |
| "loss": 0.0, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.5251111866926303e-07, | |
| "loss": 0.0, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.3795537331139596e-07, | |
| "loss": 0.0, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.238266514101417e-07, | |
| "loss": 0.0, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 4.6899999999999995, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.101255710015626e-07, | |
| "loss": 0.0, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9685273141530348e-07, | |
| "loss": 0.0, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.84008713248367e-07, | |
| "loss": 0.0, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7159407833971298e-07, | |
| "loss": 0.0, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5960936974569353e-07, | |
| "loss": 0.0, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4805511171628538e-07, | |
| "loss": 0.0, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3693180967216614e-07, | |
| "loss": 0.0, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2623995018260283e-07, | |
| "loss": 0.0, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1598000094416362e-07, | |
| "loss": 0.0, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.061524107602696e-07, | |
| "loss": 0.0, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.675760952154833e-08, | |
| "loss": 0.0, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.779600818704437e-08, | |
| "loss": 0.0, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.8100000000000005, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.926799876622926e-08, | |
| "loss": 0.0, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.117395430186414e-08, | |
| "loss": 0.0, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.351422885367276e-08, | |
| "loss": 0.0, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.628915748286057e-08, | |
| "loss": 0.0, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9499056237457544e-08, | |
| "loss": 0.0, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.31442221384859e-08, | |
| "loss": 0.0, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.7224933166979396e-08, | |
| "loss": 0.0, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.174144825180969e-08, | |
| "loss": 0.0, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6694007258374344e-08, | |
| "loss": 0.0, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.208283097809183e-08, | |
| "loss": 0.0, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.790812111875151e-08, | |
| "loss": 0.0, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4170060295687348e-08, | |
| "loss": 0.0, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0868812023787646e-08, | |
| "loss": 0.0, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 4.9399999999999995, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.004520710347408e-09, | |
| "loss": 0.0, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.577311648748973e-09, | |
| "loss": 0.0, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.587291012977501e-09, | |
| "loss": 0.0, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0345458529835715e-09, | |
| "loss": 0.0, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.191440908706828e-10, | |
| "loss": 0.0, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.41134517925401e-10, | |
| "loss": 0.0, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.467924824031912e-13, | |
| "loss": 0.0, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.42298541948928e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |