| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.5280855941918228, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07642338555598013, | |
| "grad_norm": 0.04852772876620293, | |
| "learning_rate": 0.00019991780772074993, | |
| "loss": 0.631, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15284677111196027, | |
| "grad_norm": 0.06019178777933121, | |
| "learning_rate": 0.00019926107993313918, | |
| "loss": 0.4967, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2292701566679404, | |
| "grad_norm": 0.06423385441303253, | |
| "learning_rate": 0.00019795194081958614, | |
| "loss": 0.4377, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.30569354222392053, | |
| "grad_norm": 0.05076967179775238, | |
| "learning_rate": 0.00019599899493270587, | |
| "loss": 0.4121, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.38211692777990064, | |
| "grad_norm": 0.06871291249990463, | |
| "learning_rate": 0.00019341507836108832, | |
| "loss": 0.3974, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4585403133358808, | |
| "grad_norm": 0.062317609786987305, | |
| "learning_rate": 0.00019021717436179406, | |
| "loss": 0.3826, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5349636988918609, | |
| "grad_norm": 0.054332610219717026, | |
| "learning_rate": 0.00018642630173483835, | |
| "loss": 0.3695, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6113870844478411, | |
| "grad_norm": 0.07528349757194519, | |
| "learning_rate": 0.00018206737667334231, | |
| "loss": 0.3589, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6878104700038211, | |
| "grad_norm": 0.07792173326015472, | |
| "learning_rate": 0.00017716904899736617, | |
| "loss": 0.3527, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7642338555598013, | |
| "grad_norm": 0.08550075441598892, | |
| "learning_rate": 0.00017176351384780628, | |
| "loss": 0.3407, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8406572411157814, | |
| "grad_norm": 0.09895286709070206, | |
| "learning_rate": 0.00016588630007803324, | |
| "loss": 0.3292, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9170806266717616, | |
| "grad_norm": 0.05582467094063759, | |
| "learning_rate": 0.00015957603673410394, | |
| "loss": 0.321, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9935040122277417, | |
| "grad_norm": 0.05442088469862938, | |
| "learning_rate": 0.00015287419915839849, | |
| "loss": 0.3202, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.0695452808559418, | |
| "grad_norm": 0.07129843533039093, | |
| "learning_rate": 0.00014582483638546267, | |
| "loss": 0.3075, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.145968666411922, | |
| "grad_norm": 0.06552577763795853, | |
| "learning_rate": 0.00013847428162179422, | |
| "loss": 0.3008, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2223920519679021, | |
| "grad_norm": 0.07303871214389801, | |
| "learning_rate": 0.00013087084771249831, | |
| "loss": 0.2948, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2988154375238823, | |
| "grad_norm": 0.12717650830745697, | |
| "learning_rate": 0.00012306450959641117, | |
| "loss": 0.2948, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3752388230798624, | |
| "grad_norm": 0.05392363294959068, | |
| "learning_rate": 0.00011510657583681364, | |
| "loss": 0.2887, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.4516622086358426, | |
| "grad_norm": 0.07771366834640503, | |
| "learning_rate": 0.00010704935138665758, | |
| "loss": 0.286, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5280855941918228, | |
| "grad_norm": 0.056173793971538544, | |
| "learning_rate": 9.894579380484204e-05, | |
| "loss": 0.2874, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3924, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.400738054418727e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |