| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.995854444956241, | |
| "eval_steps": 500, | |
| "global_step": 1626, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09212344541685859, | |
| "grad_norm": 0.14366954565048218, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8334, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.18424689083371718, | |
| "grad_norm": 0.14357222616672516, | |
| "learning_rate": 0.00019950370742676928, | |
| "loss": 0.5605, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2763703362505758, | |
| "grad_norm": 0.16773365437984467, | |
| "learning_rate": 0.000198019755833442, | |
| "loss": 0.5345, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.36849378166743435, | |
| "grad_norm": 0.1706201732158661, | |
| "learning_rate": 0.00019556287470311418, | |
| "loss": 0.5189, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.460617227084293, | |
| "grad_norm": 0.13769522309303284, | |
| "learning_rate": 0.00019215745067295169, | |
| "loss": 0.5048, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5527406725011516, | |
| "grad_norm": 0.12494733184576035, | |
| "learning_rate": 0.0001878372854760519, | |
| "loss": 0.4974, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6448641179180101, | |
| "grad_norm": 0.13252969086170197, | |
| "learning_rate": 0.00018264526043046192, | |
| "loss": 0.488, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7369875633348687, | |
| "grad_norm": 0.13999420404434204, | |
| "learning_rate": 0.00017663291080558476, | |
| "loss": 0.4843, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8291110087517273, | |
| "grad_norm": 0.1319541335105896, | |
| "learning_rate": 0.00016985991429075036, | |
| "loss": 0.4817, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.921234454168586, | |
| "grad_norm": 0.12652404606342316, | |
| "learning_rate": 0.00016239349864333527, | |
| "loss": 0.475, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0133578995854444, | |
| "grad_norm": 0.1295047402381897, | |
| "learning_rate": 0.00015430777439602876, | |
| "loss": 0.4714, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.105481345002303, | |
| "grad_norm": 0.13263845443725586, | |
| "learning_rate": 0.0001456829992466935, | |
| "loss": 0.4582, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1976047904191618, | |
| "grad_norm": 0.1430622935295105, | |
| "learning_rate": 0.00013660478143237746, | |
| "loss": 0.4536, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2897282358360203, | |
| "grad_norm": 0.12906011939048767, | |
| "learning_rate": 0.000127163229994669, | |
| "loss": 0.4477, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.381851681252879, | |
| "grad_norm": 0.13030315935611725, | |
| "learning_rate": 0.00011745206037073426, | |
| "loss": 0.4493, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.4739751266697374, | |
| "grad_norm": 0.13369910418987274, | |
| "learning_rate": 0.0001075676641878081, | |
| "loss": 0.4454, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.566098572086596, | |
| "grad_norm": 0.1270260065793991, | |
| "learning_rate": 9.760815249421973e-05, | |
| "loss": 0.4451, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.6582220175034545, | |
| "grad_norm": 0.141569122672081, | |
| "learning_rate": 8.76723819236997e-05, | |
| "loss": 0.442, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7503454629203132, | |
| "grad_norm": 0.13551534712314606, | |
| "learning_rate": 7.785897345911754e-05, | |
| "loss": 0.4418, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.842468908337172, | |
| "grad_norm": 0.12830372154712677, | |
| "learning_rate": 6.826533353525428e-05, | |
| "loss": 0.4395, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.9345923537540304, | |
| "grad_norm": 0.13482163846492767, | |
| "learning_rate": 5.8986687196999135e-05, | |
| "loss": 0.4357, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.026715799170889, | |
| "grad_norm": 0.13557973504066467, | |
| "learning_rate": 5.0115132909698394e-05, | |
| "loss": 0.4322, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.1188392445877477, | |
| "grad_norm": 0.13515286147594452, | |
| "learning_rate": 4.1738728403467974e-05, | |
| "loss": 0.4175, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.210962690004606, | |
| "grad_norm": 0.14744685590267181, | |
| "learning_rate": 3.394061662524432e-05, | |
| "loss": 0.4199, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.3030861354214647, | |
| "grad_norm": 0.14161629974842072, | |
| "learning_rate": 2.679820047424253e-05, | |
| "loss": 0.4197, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.3952095808383236, | |
| "grad_norm": 0.1435745358467102, | |
| "learning_rate": 2.0382374512275893e-05, | |
| "loss": 0.4202, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.487333026255182, | |
| "grad_norm": 0.13836345076560974, | |
| "learning_rate": 1.4756821274865695e-05, | |
| "loss": 0.4218, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.5794564716720405, | |
| "grad_norm": 0.1427369862794876, | |
| "learning_rate": 9.977379167852763e-06, | |
| "loss": 0.4208, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.671579917088899, | |
| "grad_norm": 0.14284783601760864, | |
| "learning_rate": 6.091488223675057e-06, | |
| "loss": 0.4183, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.763703362505758, | |
| "grad_norm": 0.14546233415603638, | |
| "learning_rate": 3.1377192186521354e-06, | |
| "loss": 0.4169, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.8558268079226163, | |
| "grad_norm": 0.13724300265312195, | |
| "learning_rate": 1.1453908251886636e-06, | |
| "loss": 0.4203, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.947950253339475, | |
| "grad_norm": 0.141755610704422, | |
| "learning_rate": 1.342785989868922e-07, | |
| "loss": 0.4193, | |
| "step": 1600 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1626, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5990769356845875e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |