| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999954795108831, | |
| "eval_steps": 1000, | |
| "global_step": 110607, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00024521369940743435, | |
| "loss": 5.4472, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002927618508292659, | |
| "loss": 3.467, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002997989257927486, | |
| "loss": 3.3454, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00029952720389105764, | |
| "loss": 3.2872, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00029925548198936656, | |
| "loss": 3.2489, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002989837600876756, | |
| "loss": 3.2458, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029871475540500146, | |
| "loss": 3.2073, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029844303350331044, | |
| "loss": 3.1957, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002981713116016194, | |
| "loss": 3.1817, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029789958969992844, | |
| "loss": 3.1631, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.4182117332510669, | |
| "eval_loss": 3.180420398712158, | |
| "eval_runtime": 43.6723, | |
| "eval_samples_per_second": 148.447, | |
| "eval_steps_per_second": 2.496, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002976278677982374, | |
| "loss": 3.1598, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002973561458965464, | |
| "loss": 3.1584, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029708442399485537, | |
| "loss": 3.144, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029681270209316434, | |
| "loss": 3.1346, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002965409801914733, | |
| "loss": 3.1359, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029626925828978235, | |
| "loss": 3.1268, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002959975363880913, | |
| "loss": 3.1175, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002957258144864003, | |
| "loss": 3.1189, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002954540925847093, | |
| "loss": 3.1057, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029518237068301825, | |
| "loss": 3.1124, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.4272265623818554, | |
| "eval_loss": 3.106520891189575, | |
| "eval_runtime": 43.4484, | |
| "eval_samples_per_second": 149.212, | |
| "eval_steps_per_second": 2.509, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002949106487813272, | |
| "loss": 3.1004, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002946389268796362, | |
| "loss": 3.1018, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029436720497794523, | |
| "loss": 3.0864, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002940954830762542, | |
| "loss": 3.0872, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002938237611745632, | |
| "loss": 3.0883, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029355475649188906, | |
| "loss": 3.0843, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002932830345901981, | |
| "loss": 3.0815, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029301131268850706, | |
| "loss": 3.0784, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029273959078681604, | |
| "loss": 3.0728, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000292467868885125, | |
| "loss": 3.0757, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.42875514543315396, | |
| "eval_loss": 3.0894298553466797, | |
| "eval_runtime": 43.8742, | |
| "eval_samples_per_second": 147.763, | |
| "eval_steps_per_second": 2.484, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000292196146983434, | |
| "loss": 3.0754, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000291924425081743, | |
| "loss": 3.0634, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029165270318005194, | |
| "loss": 3.0652, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029138098127836097, | |
| "loss": 3.0566, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029110925937666994, | |
| "loss": 3.067, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002908375374749789, | |
| "loss": 3.0525, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002905658155732879, | |
| "loss": 3.0595, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029029409367159687, | |
| "loss": 3.0586, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002900223717699059, | |
| "loss": 3.0499, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002897506498682148, | |
| "loss": 3.0488, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.43185525945686004, | |
| "eval_loss": 3.062988758087158, | |
| "eval_runtime": 44.0507, | |
| "eval_samples_per_second": 147.171, | |
| "eval_steps_per_second": 2.474, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028947892796652385, | |
| "loss": 3.0426, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002892072060648328, | |
| "loss": 3.0433, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002889354841631418, | |
| "loss": 3.0428, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028866376226145083, | |
| "loss": 3.0359, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028839204035975975, | |
| "loss": 3.0386, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002881203184580688, | |
| "loss": 3.034, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028784859655637776, | |
| "loss": 3.0456, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028757687465468673, | |
| "loss": 3.0361, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002873051527529957, | |
| "loss": 3.0412, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002870334308513047, | |
| "loss": 3.0403, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.43361975362410893, | |
| "eval_loss": 3.0423271656036377, | |
| "eval_runtime": 43.1088, | |
| "eval_samples_per_second": 150.387, | |
| "eval_steps_per_second": 2.528, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002867617089496137, | |
| "loss": 3.0341, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028648998704792263, | |
| "loss": 3.0297, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028621826514623166, | |
| "loss": 3.0324, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028594654324454064, | |
| "loss": 3.0317, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002856748213428496, | |
| "loss": 3.0167, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002854030994411586, | |
| "loss": 3.0202, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028513137753946756, | |
| "loss": 3.0231, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002848596556377766, | |
| "loss": 3.0166, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002845879337360855, | |
| "loss": 3.0246, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028431621183439454, | |
| "loss": 3.0172, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.434315993866311, | |
| "eval_loss": 3.038356304168701, | |
| "eval_runtime": 43.5763, | |
| "eval_samples_per_second": 148.774, | |
| "eval_steps_per_second": 2.501, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002840444899327035, | |
| "loss": 3.0123, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002837727680310125, | |
| "loss": 3.0177, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002835010461293215, | |
| "loss": 3.0195, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002832320414466474, | |
| "loss": 3.0175, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002829603195449564, | |
| "loss": 3.0192, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028268859764326535, | |
| "loss": 3.0079, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002824168757415743, | |
| "loss": 3.0138, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002821451538398833, | |
| "loss": 3.0175, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028187343193819233, | |
| "loss": 3.0148, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028160171003650125, | |
| "loss": 3.0102, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.43602120780442366, | |
| "eval_loss": 3.026742696762085, | |
| "eval_runtime": 43.2189, | |
| "eval_samples_per_second": 150.004, | |
| "eval_steps_per_second": 2.522, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002813299881348103, | |
| "loss": 3.0111, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028105826623311925, | |
| "loss": 3.0077, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028078654433142823, | |
| "loss": 3.0055, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002805148224297372, | |
| "loss": 3.0084, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002802431005280462, | |
| "loss": 3.0124, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002799713786263552, | |
| "loss": 3.0051, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027969965672466413, | |
| "loss": 3.0039, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027942793482297316, | |
| "loss": 3.0033, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027915621292128214, | |
| "loss": 3.0044, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002788844910195911, | |
| "loss": 2.9888, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4361119428490199, | |
| "eval_loss": 3.0189716815948486, | |
| "eval_runtime": 43.5746, | |
| "eval_samples_per_second": 148.779, | |
| "eval_steps_per_second": 2.501, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027861276911790014, | |
| "loss": 3.0097, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027834104721620906, | |
| "loss": 3.008, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002780693253145181, | |
| "loss": 2.9979, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027779760341282707, | |
| "loss": 2.994, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027752588151113604, | |
| "loss": 2.9985, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000277254159609445, | |
| "loss": 2.9966, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000276982437707754, | |
| "loss": 2.9968, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000276710715806063, | |
| "loss": 2.9999, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000276438993904372, | |
| "loss": 2.9973, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027616727200268097, | |
| "loss": 3.0024, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.4384970647213073, | |
| "eval_loss": 3.0039989948272705, | |
| "eval_runtime": 44.0911, | |
| "eval_samples_per_second": 147.036, | |
| "eval_steps_per_second": 2.472, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027589555010098995, | |
| "loss": 3.0015, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002756238281992989, | |
| "loss": 2.9946, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002753521062976079, | |
| "loss": 2.9932, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002750803843959169, | |
| "loss": 2.9985, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002748086624942259, | |
| "loss": 2.9913, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002745369405925349, | |
| "loss": 2.9946, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027426521869084385, | |
| "loss": 2.99, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027399349678915283, | |
| "loss": 2.9927, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002737217748874618, | |
| "loss": 2.9883, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027345005298577083, | |
| "loss": 2.9948, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.43782985969337607, | |
| "eval_loss": 3.0057804584503174, | |
| "eval_runtime": 43.6419, | |
| "eval_samples_per_second": 148.55, | |
| "eval_steps_per_second": 2.498, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002731810483030967, | |
| "loss": 2.983, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002729093264014057, | |
| "loss": 2.9806, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027263760449971466, | |
| "loss": 2.9881, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027236588259802364, | |
| "loss": 2.9814, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027209416069633267, | |
| "loss": 2.9824, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027182243879464164, | |
| "loss": 2.9885, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002715507168929506, | |
| "loss": 2.989, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002712789949912596, | |
| "loss": 2.986, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027100727308956857, | |
| "loss": 2.9856, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027073555118787754, | |
| "loss": 2.9774, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.438878756808909, | |
| "eval_loss": 2.9962034225463867, | |
| "eval_runtime": 43.3441, | |
| "eval_samples_per_second": 149.57, | |
| "eval_steps_per_second": 2.515, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002704638292861865, | |
| "loss": 2.9941, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027019210738449555, | |
| "loss": 2.9799, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002699203854828045, | |
| "loss": 2.9834, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002696513808001304, | |
| "loss": 2.9767, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002693796588984394, | |
| "loss": 2.9772, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002691079369967484, | |
| "loss": 2.9891, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002688362150950573, | |
| "loss": 2.9787, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026856449319336635, | |
| "loss": 2.987, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026829277129167533, | |
| "loss": 2.979, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002680210493899843, | |
| "loss": 2.9818, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.4390166740766953, | |
| "eval_loss": 2.9964208602905273, | |
| "eval_runtime": 44.0294, | |
| "eval_samples_per_second": 147.242, | |
| "eval_steps_per_second": 2.476, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002677493274882933, | |
| "loss": 2.9798, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026747760558660225, | |
| "loss": 2.9833, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002672058836849113, | |
| "loss": 2.9787, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026693416178322026, | |
| "loss": 2.9807, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026666515710054614, | |
| "loss": 2.9846, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002663934351988551, | |
| "loss": 2.9758, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026612171329716414, | |
| "loss": 2.9749, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026584999139547306, | |
| "loss": 2.9688, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002655782694937821, | |
| "loss": 2.9886, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026530654759209107, | |
| "loss": 2.9771, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.4395580598427864, | |
| "eval_loss": 2.991270065307617, | |
| "eval_runtime": 43.0298, | |
| "eval_samples_per_second": 150.663, | |
| "eval_steps_per_second": 2.533, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026503482569040004, | |
| "loss": 2.9802, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000264763103788709, | |
| "loss": 2.9711, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000264491381887018, | |
| "loss": 2.9845, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000264219659985327, | |
| "loss": 2.9735, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026394793808363594, | |
| "loss": 2.9731, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026367621618194497, | |
| "loss": 2.9717, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026340449428025395, | |
| "loss": 2.9718, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002631327723785629, | |
| "loss": 2.9766, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002628637676958888, | |
| "loss": 2.9812, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026259204579419783, | |
| "loss": 2.9786, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.43911829732664315, | |
| "eval_loss": 2.9915201663970947, | |
| "eval_runtime": 43.7467, | |
| "eval_samples_per_second": 148.194, | |
| "eval_steps_per_second": 2.492, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002623203238925068, | |
| "loss": 2.9757, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002620486019908158, | |
| "loss": 2.9781, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026177959730814166, | |
| "loss": 2.9733, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002615078754064507, | |
| "loss": 2.9773, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002612361535047596, | |
| "loss": 2.9755, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026096443160306864, | |
| "loss": 2.9837, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002606927097013776, | |
| "loss": 2.9786, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002604209877996866, | |
| "loss": 2.9709, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026014926589799556, | |
| "loss": 2.9797, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025987754399630454, | |
| "loss": 2.9866, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4393814289559723, | |
| "eval_loss": 2.9924139976501465, | |
| "eval_runtime": 43.2705, | |
| "eval_samples_per_second": 149.825, | |
| "eval_steps_per_second": 2.519, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025960582209461357, | |
| "loss": 2.976, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002593341001929225, | |
| "loss": 2.9674, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002590623782912315, | |
| "loss": 2.98, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002587906563895405, | |
| "loss": 2.9805, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025851893448784947, | |
| "loss": 2.9738, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025824721258615844, | |
| "loss": 2.9702, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002579754906844674, | |
| "loss": 2.9678, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025770376878277645, | |
| "loss": 2.9699, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002574320468810854, | |
| "loss": 2.9717, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002571603249793944, | |
| "loss": 2.9751, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.43892109982972055, | |
| "eval_loss": 2.9917728900909424, | |
| "eval_runtime": 44.2385, | |
| "eval_samples_per_second": 146.547, | |
| "eval_steps_per_second": 2.464, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002568886030777034, | |
| "loss": 2.9653, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025661688117601235, | |
| "loss": 2.9817, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002563451592743214, | |
| "loss": 2.9652, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025607343737263035, | |
| "loss": 2.9704, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025580171547093933, | |
| "loss": 2.9727, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002555299935692483, | |
| "loss": 2.9743, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002552582716675573, | |
| "loss": 2.9719, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025498654976586626, | |
| "loss": 2.9615, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025471482786417523, | |
| "loss": 2.973, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025444310596248426, | |
| "loss": 2.9702, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.4393215438265388, | |
| "eval_loss": 2.992605447769165, | |
| "eval_runtime": 45.7096, | |
| "eval_samples_per_second": 141.83, | |
| "eval_steps_per_second": 2.385, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025417410127981014, | |
| "loss": 2.9689, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002539023793781191, | |
| "loss": 2.9727, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002536306574764281, | |
| "loss": 2.9669, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025335893557473706, | |
| "loss": 2.9717, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002530872136730461, | |
| "loss": 2.9646, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025281549177135507, | |
| "loss": 2.9757, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025254376986966404, | |
| "loss": 2.9679, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000252272047967973, | |
| "loss": 2.9691, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000252000326066282, | |
| "loss": 2.9718, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025172860416459097, | |
| "loss": 2.9695, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.44013513472641874, | |
| "eval_loss": 2.981644868850708, | |
| "eval_runtime": 43.6409, | |
| "eval_samples_per_second": 148.553, | |
| "eval_steps_per_second": 2.498, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002514568822629, | |
| "loss": 2.9666, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002511878775802259, | |
| "loss": 2.9696, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025091615567853485, | |
| "loss": 2.9687, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002506444337768438, | |
| "loss": 2.9674, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002503727118751528, | |
| "loss": 2.9655, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025010098997346183, | |
| "loss": 2.9661, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002498292680717708, | |
| "loss": 2.9673, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002495575461700798, | |
| "loss": 2.9641, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024928582426838876, | |
| "loss": 2.9598, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024901410236669773, | |
| "loss": 2.9615, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.44022042566833924, | |
| "eval_loss": 2.982591390609741, | |
| "eval_runtime": 43.6998, | |
| "eval_samples_per_second": 148.353, | |
| "eval_steps_per_second": 2.494, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024874238046500676, | |
| "loss": 2.958, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002484706585633157, | |
| "loss": 2.9688, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002481989366616247, | |
| "loss": 2.9603, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002479272147599337, | |
| "loss": 2.9625, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024765549285824266, | |
| "loss": 2.9611, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024738377095655164, | |
| "loss": 2.9594, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002471120490548606, | |
| "loss": 2.9648, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024684032715316964, | |
| "loss": 2.961, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002465686052514786, | |
| "loss": 2.9589, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002462968833497876, | |
| "loss": 2.9609, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.4406414362752659, | |
| "eval_loss": 2.9791083335876465, | |
| "eval_runtime": 44.2323, | |
| "eval_samples_per_second": 146.567, | |
| "eval_steps_per_second": 2.464, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024602516144809657, | |
| "loss": 2.962, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024575343954640554, | |
| "loss": 2.9566, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024548171764471457, | |
| "loss": 2.964, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002452099957430235, | |
| "loss": 2.9573, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002449409910603494, | |
| "loss": 2.9621, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002446692691586584, | |
| "loss": 2.9568, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002443975472569674, | |
| "loss": 2.9643, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024412582535527635, | |
| "loss": 2.9614, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024385410345358535, | |
| "loss": 2.9546, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024358238155189433, | |
| "loss": 2.9607, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.44158508073906716, | |
| "eval_loss": 2.9684245586395264, | |
| "eval_runtime": 43.092, | |
| "eval_samples_per_second": 150.446, | |
| "eval_steps_per_second": 2.529, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024331065965020333, | |
| "loss": 2.9608, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024303893774851228, | |
| "loss": 2.9556, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024276721584682128, | |
| "loss": 2.9579, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024249549394513028, | |
| "loss": 2.9585, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024222377204343926, | |
| "loss": 2.9544, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024195205014174823, | |
| "loss": 2.9614, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002416803282400572, | |
| "loss": 2.9536, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024141132355738314, | |
| "loss": 2.9556, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002411396016556921, | |
| "loss": 2.9559, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002408678797540011, | |
| "loss": 2.9533, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.4422038937432138, | |
| "eval_loss": 2.967719554901123, | |
| "eval_runtime": 42.9224, | |
| "eval_samples_per_second": 151.04, | |
| "eval_steps_per_second": 2.539, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024059615785231007, | |
| "loss": 2.9493, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024032443595061907, | |
| "loss": 2.9543, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024005271404892804, | |
| "loss": 2.9565, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023978099214723702, | |
| "loss": 2.9501, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023950927024554602, | |
| "loss": 2.9395, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023923754834385497, | |
| "loss": 2.9598, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023896854366118087, | |
| "loss": 2.9492, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023869682175948988, | |
| "loss": 2.947, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023842509985779885, | |
| "loss": 2.9573, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023815337795610783, | |
| "loss": 2.9513, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4420853332849413, | |
| "eval_loss": 2.9676427841186523, | |
| "eval_runtime": 43.0836, | |
| "eval_samples_per_second": 150.475, | |
| "eval_steps_per_second": 2.53, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023788165605441683, | |
| "loss": 2.9472, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023760993415272578, | |
| "loss": 2.9513, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023733821225103478, | |
| "loss": 2.9542, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023706649034934378, | |
| "loss": 2.9497, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023679476844765276, | |
| "loss": 2.9565, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023652576376497863, | |
| "loss": 2.9518, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023625404186328764, | |
| "loss": 2.9471, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023598231996159664, | |
| "loss": 2.956, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023571059805990559, | |
| "loss": 2.953, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002354388761582146, | |
| "loss": 2.9563, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.4428523468619285, | |
| "eval_loss": 2.9609880447387695, | |
| "eval_runtime": 42.9764, | |
| "eval_samples_per_second": 150.85, | |
| "eval_steps_per_second": 2.536, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023516715425652356, | |
| "loss": 2.9458, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023489543235483257, | |
| "loss": 2.9539, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023462371045314151, | |
| "loss": 2.9549, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023435198855145052, | |
| "loss": 2.9496, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023408026664975952, | |
| "loss": 2.9514, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002338085447480685, | |
| "loss": 2.9471, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023353682284637747, | |
| "loss": 2.9448, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023326510094468644, | |
| "loss": 2.948, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023299337904299545, | |
| "loss": 2.9454, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023272165714130445, | |
| "loss": 2.9466, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.44241621374756906, | |
| "eval_loss": 2.9626522064208984, | |
| "eval_runtime": 43.5013, | |
| "eval_samples_per_second": 149.03, | |
| "eval_steps_per_second": 2.506, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002324499352396134, | |
| "loss": 2.9417, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002321782133379224, | |
| "loss": 2.9452, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023190649143623138, | |
| "loss": 2.9406, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023163476953454038, | |
| "loss": 2.945, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023136304763284933, | |
| "loss": 2.9419, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023109132573115833, | |
| "loss": 2.9452, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023081960382946733, | |
| "loss": 2.9435, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023054788192777628, | |
| "loss": 2.947, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023027616002608528, | |
| "loss": 2.9343, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00023000443812439426, | |
| "loss": 2.9431, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.442384758932109, | |
| "eval_loss": 2.9589717388153076, | |
| "eval_runtime": 43.1206, | |
| "eval_samples_per_second": 150.346, | |
| "eval_steps_per_second": 2.528, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022973271622270326, | |
| "loss": 2.9431, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022946099432101226, | |
| "loss": 2.9477, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002291892724193212, | |
| "loss": 2.939, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002289175505176302, | |
| "loss": 2.9385, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002286458286159392, | |
| "loss": 2.944, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022837410671424816, | |
| "loss": 2.9404, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022810238481255716, | |
| "loss": 2.9334, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022783066291086614, | |
| "loss": 2.9419, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022755894100917514, | |
| "loss": 2.9432, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022728993632650102, | |
| "loss": 2.9412, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.4435655243124552, | |
| "eval_loss": 2.952514410018921, | |
| "eval_runtime": 43.0804, | |
| "eval_samples_per_second": 150.486, | |
| "eval_steps_per_second": 2.53, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022701821442481, | |
| "loss": 2.9359, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.000226746492523119, | |
| "loss": 2.9426, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022647477062142797, | |
| "loss": 2.9307, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022620304871973695, | |
| "loss": 2.9353, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022593132681804595, | |
| "loss": 2.9353, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002256596049163549, | |
| "loss": 2.9403, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002253878830146639, | |
| "loss": 2.9393, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002251161611129729, | |
| "loss": 2.9313, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022484443921128188, | |
| "loss": 2.9348, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022457543452860775, | |
| "loss": 2.9299, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.4434947509776701, | |
| "eval_loss": 2.9504144191741943, | |
| "eval_runtime": 43.7459, | |
| "eval_samples_per_second": 148.197, | |
| "eval_steps_per_second": 2.492, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022430371262691676, | |
| "loss": 2.938, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022403199072522576, | |
| "loss": 2.9353, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022376298604255164, | |
| "loss": 2.9329, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002234912641408606, | |
| "loss": 2.9311, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002232195422391696, | |
| "loss": 2.9377, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022294782033747856, | |
| "loss": 2.9303, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022267609843578756, | |
| "loss": 2.9278, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022240437653409657, | |
| "loss": 2.9394, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022213265463240551, | |
| "loss": 2.9332, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022186093273071452, | |
| "loss": 2.9332, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.4434814431711293, | |
| "eval_loss": 2.9485716819763184, | |
| "eval_runtime": 41.8653, | |
| "eval_samples_per_second": 154.854, | |
| "eval_steps_per_second": 2.604, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002215892108290235, | |
| "loss": 2.9339, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002213174889273325, | |
| "loss": 2.9322, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002210457670256415, | |
| "loss": 2.9305, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022077404512395044, | |
| "loss": 2.9321, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022050232322225945, | |
| "loss": 2.9265, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022023331853958532, | |
| "loss": 2.9247, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021996159663789433, | |
| "loss": 2.9312, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002196898747362033, | |
| "loss": 2.9288, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002194181528345123, | |
| "loss": 2.9328, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021914643093282125, | |
| "loss": 2.9255, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.444235753841873, | |
| "eval_loss": 2.942479372024536, | |
| "eval_runtime": 41.7184, | |
| "eval_samples_per_second": 155.399, | |
| "eval_steps_per_second": 2.613, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021887470903113025, | |
| "loss": 2.9265, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021860298712943923, | |
| "loss": 2.9184, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021833126522774823, | |
| "loss": 2.9271, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002180622605450741, | |
| "loss": 2.9232, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002177905386433831, | |
| "loss": 2.9303, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021751881674169206, | |
| "loss": 2.9348, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021724709484000106, | |
| "loss": 2.9218, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021697537293831006, | |
| "loss": 2.9324, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021670365103661904, | |
| "loss": 2.9294, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021643192913492801, | |
| "loss": 2.9242, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.44344756875448005, | |
| "eval_loss": 2.945934534072876, | |
| "eval_runtime": 43.5276, | |
| "eval_samples_per_second": 148.94, | |
| "eval_steps_per_second": 2.504, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000216160207233237, | |
| "loss": 2.9231, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000215888485331546, | |
| "loss": 2.9269, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000215616763429855, | |
| "loss": 2.9247, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021534504152816394, | |
| "loss": 2.9236, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021507331962647295, | |
| "loss": 2.9296, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021480159772478192, | |
| "loss": 2.9267, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021452987582309092, | |
| "loss": 2.9259, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021425815392139987, | |
| "loss": 2.9259, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021398643201970887, | |
| "loss": 2.9236, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021371471011801788, | |
| "loss": 2.9242, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.4445194520813107, | |
| "eval_loss": 2.9377670288085938, | |
| "eval_runtime": 43.8729, | |
| "eval_samples_per_second": 147.768, | |
| "eval_steps_per_second": 2.484, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021344298821632685, | |
| "loss": 2.9178, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021317126631463583, | |
| "loss": 2.9257, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002128995444129448, | |
| "loss": 2.9227, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002126278225112538, | |
| "loss": 2.9228, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002123561006095628, | |
| "loss": 2.9183, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021208437870787175, | |
| "loss": 2.9196, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021181265680618076, | |
| "loss": 2.9143, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021154093490448973, | |
| "loss": 2.9192, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002112692130027987, | |
| "loss": 2.9187, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021099749110110768, | |
| "loss": 2.9267, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.4452544059425405, | |
| "eval_loss": 2.9316306114196777, | |
| "eval_runtime": 45.514, | |
| "eval_samples_per_second": 142.44, | |
| "eval_steps_per_second": 2.395, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021072576919941669, | |
| "loss": 2.9169, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002104540472977257, | |
| "loss": 2.9219, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021018232539603464, | |
| "loss": 2.9096, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020991060349434364, | |
| "loss": 2.9202, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002096388815926526, | |
| "loss": 2.9241, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020936987690997852, | |
| "loss": 2.9148, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002090981550082875, | |
| "loss": 2.9194, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002088264331065965, | |
| "loss": 2.9267, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020855471120490544, | |
| "loss": 2.9164, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020828298930321445, | |
| "loss": 2.9151, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.44544252993500344, | |
| "eval_loss": 2.931532382965088, | |
| "eval_runtime": 43.496, | |
| "eval_samples_per_second": 149.048, | |
| "eval_steps_per_second": 2.506, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020801126740152345, | |
| "loss": 2.9178, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020773954549983242, | |
| "loss": 2.9119, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020746782359814143, | |
| "loss": 2.9143, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020719610169645037, | |
| "loss": 2.9084, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020692437979475938, | |
| "loss": 2.9227, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020665265789306835, | |
| "loss": 2.9159, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020638365321039425, | |
| "loss": 2.9151, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020611193130870323, | |
| "loss": 2.9218, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020584020940701223, | |
| "loss": 2.9169, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020556848750532118, | |
| "loss": 2.9105, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.4455647197950598, | |
| "eval_loss": 2.928622245788574, | |
| "eval_runtime": 45.1155, | |
| "eval_samples_per_second": 143.698, | |
| "eval_steps_per_second": 2.416, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020529676560363018, | |
| "loss": 2.9135, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020502504370193919, | |
| "loss": 2.9099, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020475332180024816, | |
| "loss": 2.9114, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020448159989855714, | |
| "loss": 2.9169, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002042098779968661, | |
| "loss": 2.9098, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020393815609517511, | |
| "loss": 2.9126, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020366643419348412, | |
| "loss": 2.9095, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020339471229179306, | |
| "loss": 2.9086, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020312299039010207, | |
| "loss": 2.9077, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020285126848841104, | |
| "loss": 2.9053, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.4457353016789008, | |
| "eval_loss": 2.924194097518921, | |
| "eval_runtime": 41.9708, | |
| "eval_samples_per_second": 154.464, | |
| "eval_steps_per_second": 2.597, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020257954658672004, | |
| "loss": 2.9099, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000202307824685029, | |
| "loss": 2.9118, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000202036102783338, | |
| "loss": 2.91, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000201764380881647, | |
| "loss": 2.8983, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020149265897995595, | |
| "loss": 2.8964, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020122093707826495, | |
| "loss": 2.9024, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020095193239559085, | |
| "loss": 2.9057, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020068021049389983, | |
| "loss": 2.9094, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002004084885922088, | |
| "loss": 2.9071, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002001367666905178, | |
| "loss": 2.9023, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.44664325702516083, | |
| "eval_loss": 2.9194602966308594, | |
| "eval_runtime": 42.9573, | |
| "eval_samples_per_second": 150.917, | |
| "eval_steps_per_second": 2.537, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019986504478882678, | |
| "loss": 2.9047, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019959332288713575, | |
| "loss": 2.9097, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019932160098544476, | |
| "loss": 2.908, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019905259630277066, | |
| "loss": 2.9019, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001987808744010796, | |
| "loss": 2.9105, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001985091524993886, | |
| "loss": 2.9064, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001982401478167145, | |
| "loss": 2.9053, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001979684259150235, | |
| "loss": 2.906, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019769670401333247, | |
| "loss": 2.8997, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019742498211164147, | |
| "loss": 2.8946, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.4468059752051368, | |
| "eval_loss": 2.917731285095215, | |
| "eval_runtime": 43.2928, | |
| "eval_samples_per_second": 149.748, | |
| "eval_steps_per_second": 2.518, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019715326020995042, | |
| "loss": 2.9018, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019688153830825942, | |
| "loss": 2.8969, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019660981640656842, | |
| "loss": 2.9104, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001963380945048774, | |
| "loss": 2.9057, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019606637260318637, | |
| "loss": 2.9094, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019579465070149535, | |
| "loss": 2.9008, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019552292879980435, | |
| "loss": 2.8998, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019525120689811335, | |
| "loss": 2.9019, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001949794849964223, | |
| "loss": 2.8925, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001947077630947313, | |
| "loss": 2.9037, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.44703039321543825, | |
| "eval_loss": 2.9147427082061768, | |
| "eval_runtime": 43.7223, | |
| "eval_samples_per_second": 148.277, | |
| "eval_steps_per_second": 2.493, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019443604119304028, | |
| "loss": 2.9052, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019416431929134928, | |
| "loss": 2.9038, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019389259738965823, | |
| "loss": 2.9046, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019362087548796723, | |
| "loss": 2.903, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019334915358627623, | |
| "loss": 2.8919, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019307743168458518, | |
| "loss": 2.8936, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019280570978289418, | |
| "loss": 2.8985, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019253398788120316, | |
| "loss": 2.8955, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019226226597951216, | |
| "loss": 2.8943, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019199326129683804, | |
| "loss": 2.8893, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.44681383890900184, | |
| "eval_loss": 2.9129724502563477, | |
| "eval_runtime": 42.9613, | |
| "eval_samples_per_second": 150.903, | |
| "eval_steps_per_second": 2.537, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019172153939514704, | |
| "loss": 2.8923, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019144981749345602, | |
| "loss": 2.8998, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000191178095591765, | |
| "loss": 2.8931, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019090637369007397, | |
| "loss": 2.8965, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019063465178838297, | |
| "loss": 2.8992, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019036292988669197, | |
| "loss": 2.8974, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019009120798500092, | |
| "loss": 2.8929, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018981948608330992, | |
| "loss": 2.8919, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001895477641816189, | |
| "loss": 2.8907, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001892760422799279, | |
| "loss": 2.8891, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.4481204235511882, | |
| "eval_loss": 2.9055044651031494, | |
| "eval_runtime": 43.4382, | |
| "eval_samples_per_second": 149.246, | |
| "eval_steps_per_second": 2.509, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018900432037823687, | |
| "loss": 2.8892, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018873259847654585, | |
| "loss": 2.8979, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018846087657485485, | |
| "loss": 2.8864, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001881891546731638, | |
| "loss": 2.8905, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001879174327714728, | |
| "loss": 2.8849, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001876457108697818, | |
| "loss": 2.8959, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018737398896809078, | |
| "loss": 2.8923, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018710226706639978, | |
| "loss": 2.8878, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018683326238372566, | |
| "loss": 2.8848, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018656154048203463, | |
| "loss": 2.8851, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.4484996960376006, | |
| "eval_loss": 2.90169358253479, | |
| "eval_runtime": 44.5924, | |
| "eval_samples_per_second": 145.384, | |
| "eval_steps_per_second": 2.444, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001862898185803436, | |
| "loss": 2.8892, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001860180966786526, | |
| "loss": 2.8835, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001857463747769616, | |
| "loss": 2.8868, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001854746528752706, | |
| "loss": 2.89, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018520293097357954, | |
| "loss": 2.8903, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018493120907188854, | |
| "loss": 2.8868, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018466220438921442, | |
| "loss": 2.8882, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018439048248752342, | |
| "loss": 2.8788, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001841187605858324, | |
| "loss": 2.8884, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001838470386841414, | |
| "loss": 2.8909, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.44834423666119233, | |
| "eval_loss": 2.9010777473449707, | |
| "eval_runtime": 43.3319, | |
| "eval_samples_per_second": 149.613, | |
| "eval_steps_per_second": 2.515, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001835753167824504, | |
| "loss": 2.8868, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018330359488075935, | |
| "loss": 2.8935, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018303187297906835, | |
| "loss": 2.883, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018276015107737733, | |
| "loss": 2.8895, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001824911463947032, | |
| "loss": 2.8958, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001822194244930122, | |
| "loss": 2.8916, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001819477025913212, | |
| "loss": 2.8949, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018167869790864708, | |
| "loss": 2.8898, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018140697600695606, | |
| "loss": 2.8887, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018113525410526506, | |
| "loss": 2.896, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.4478663654263186, | |
| "eval_loss": 2.9061102867126465, | |
| "eval_runtime": 43.1173, | |
| "eval_samples_per_second": 150.357, | |
| "eval_steps_per_second": 2.528, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018086353220357404, | |
| "loss": 2.8965, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.000180591810301883, | |
| "loss": 2.8969, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018032008840019201, | |
| "loss": 2.8913, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018004836649850096, | |
| "loss": 2.8897, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017977664459680996, | |
| "loss": 2.8952, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017950492269511897, | |
| "loss": 2.9008, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017923320079342794, | |
| "loss": 2.8884, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017896147889173694, | |
| "loss": 2.8971, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001786897569900459, | |
| "loss": 2.8824, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001784180350883549, | |
| "loss": 2.8918, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.44788874673731904, | |
| "eval_loss": 2.90425443649292, | |
| "eval_runtime": 45.928, | |
| "eval_samples_per_second": 141.156, | |
| "eval_steps_per_second": 2.373, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017814631318666387, | |
| "loss": 2.886, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017787459128497285, | |
| "loss": 2.8935, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017760286938328185, | |
| "loss": 2.8851, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017733114748159082, | |
| "loss": 2.8869, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017705942557989983, | |
| "loss": 2.8816, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017678770367820877, | |
| "loss": 2.8726, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017651598177651778, | |
| "loss": 2.8815, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017624425987482678, | |
| "loss": 2.8835, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017597253797313575, | |
| "loss": 2.8814, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017570081607144473, | |
| "loss": 2.8847, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.4490059975864478, | |
| "eval_loss": 2.89544415473938, | |
| "eval_runtime": 42.9804, | |
| "eval_samples_per_second": 150.836, | |
| "eval_steps_per_second": 2.536, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001754290941697537, | |
| "loss": 2.8699, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001751573722680627, | |
| "loss": 2.8829, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017488565036637165, | |
| "loss": 2.8773, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017461392846468066, | |
| "loss": 2.8812, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017434220656298966, | |
| "loss": 2.8805, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017407048466129863, | |
| "loss": 2.8812, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017379876275960764, | |
| "loss": 2.8826, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017352704085791659, | |
| "loss": 2.8801, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017325803617524252, | |
| "loss": 2.8787, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017298631427355146, | |
| "loss": 2.8749, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.44940160238088755, | |
| "eval_loss": 2.8912456035614014, | |
| "eval_runtime": 43.8328, | |
| "eval_samples_per_second": 147.903, | |
| "eval_steps_per_second": 2.487, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017271730959087737, | |
| "loss": 2.8715, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017244558768918637, | |
| "loss": 2.8804, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017217386578749535, | |
| "loss": 2.8802, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017190214388580432, | |
| "loss": 2.8779, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017163042198411332, | |
| "loss": 2.878, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017135870008242227, | |
| "loss": 2.8835, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017108697818073127, | |
| "loss": 2.8758, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017081525627904025, | |
| "loss": 2.8751, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017054353437734925, | |
| "loss": 2.8737, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017027181247565825, | |
| "loss": 2.8832, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.4496018243792967, | |
| "eval_loss": 2.891221761703491, | |
| "eval_runtime": 43.1479, | |
| "eval_samples_per_second": 150.251, | |
| "eval_steps_per_second": 2.526, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001700000905739672, | |
| "loss": 2.8757, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001697283686722762, | |
| "loss": 2.8725, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016945664677058518, | |
| "loss": 2.8749, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016918492486889416, | |
| "loss": 2.8747, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016891320296720316, | |
| "loss": 2.8724, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016864148106551213, | |
| "loss": 2.8717, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016836975916382114, | |
| "loss": 2.8653, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016809803726213008, | |
| "loss": 2.869, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016782631536043909, | |
| "loss": 2.8763, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001675545934587481, | |
| "loss": 2.8745, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.45002646438800725, | |
| "eval_loss": 2.8852970600128174, | |
| "eval_runtime": 43.6365, | |
| "eval_samples_per_second": 148.568, | |
| "eval_steps_per_second": 2.498, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016728287155705706, | |
| "loss": 2.8753, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016701114965536604, | |
| "loss": 2.8684, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016673942775367501, | |
| "loss": 2.8711, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016646770585198402, | |
| "loss": 2.8646, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016619598395029296, | |
| "loss": 2.865, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001659269792676189, | |
| "loss": 2.8773, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016565525736592787, | |
| "loss": 2.8703, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016538353546423687, | |
| "loss": 2.8722, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016511181356254582, | |
| "loss": 2.8713, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016484009166085482, | |
| "loss": 2.8717, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.45021942758284866, | |
| "eval_loss": 2.8834283351898193, | |
| "eval_runtime": 43.5477, | |
| "eval_samples_per_second": 148.871, | |
| "eval_steps_per_second": 2.503, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016456836975916383, | |
| "loss": 2.8727, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016429664785747277, | |
| "loss": 2.8622, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016402492595578178, | |
| "loss": 2.8707, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016375320405409075, | |
| "loss": 2.8645, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016348148215239975, | |
| "loss": 2.8642, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016321247746972563, | |
| "loss": 2.8679, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016294075556803463, | |
| "loss": 2.871, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001626690336663436, | |
| "loss": 2.867, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016239731176465258, | |
| "loss": 2.8643, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016212558986296156, | |
| "loss": 2.8659, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.45029624992060685, | |
| "eval_loss": 2.883072853088379, | |
| "eval_runtime": 43.5545, | |
| "eval_samples_per_second": 148.848, | |
| "eval_steps_per_second": 2.503, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016185386796127056, | |
| "loss": 2.8694, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016158214605957956, | |
| "loss": 2.8671, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001613104241578885, | |
| "loss": 2.8624, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016103870225619751, | |
| "loss": 2.8665, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001607669803545065, | |
| "loss": 2.8613, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016049525845281547, | |
| "loss": 2.8637, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016022353655112447, | |
| "loss": 2.8662, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015995181464943344, | |
| "loss": 2.8652, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015968009274774245, | |
| "loss": 2.8673, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001594083708460514, | |
| "loss": 2.865, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.450486793514259, | |
| "eval_loss": 2.878352403640747, | |
| "eval_runtime": 43.3417, | |
| "eval_samples_per_second": 149.579, | |
| "eval_steps_per_second": 2.515, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001591366489443604, | |
| "loss": 2.8688, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015886492704266937, | |
| "loss": 2.862, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015859320514097837, | |
| "loss": 2.8646, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015832148323928735, | |
| "loss": 2.8672, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015804976133759632, | |
| "loss": 2.8594, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015777803943590533, | |
| "loss": 2.8558, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001575063175342143, | |
| "loss": 2.8576, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001572373128515402, | |
| "loss": 2.8597, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015696559094984918, | |
| "loss": 2.8615, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015669386904815818, | |
| "loss": 2.8575, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.45082372297985984, | |
| "eval_loss": 2.8763039112091064, | |
| "eval_runtime": 43.6525, | |
| "eval_samples_per_second": 148.514, | |
| "eval_steps_per_second": 2.497, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015642214714646713, | |
| "loss": 2.8673, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015615042524477613, | |
| "loss": 2.854, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015587870334308514, | |
| "loss": 2.8652, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.000155609698660411, | |
| "loss": 2.8596, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015533797675872, | |
| "loss": 2.8641, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.000155066254857029, | |
| "loss": 2.8595, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.000154794532955338, | |
| "loss": 2.8562, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015452281105364694, | |
| "loss": 2.8529, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015425108915195594, | |
| "loss": 2.8629, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015397936725026492, | |
| "loss": 2.8571, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.4512689295986789, | |
| "eval_loss": 2.874122142791748, | |
| "eval_runtime": 43.0942, | |
| "eval_samples_per_second": 150.438, | |
| "eval_steps_per_second": 2.529, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001537076453485739, | |
| "loss": 2.8605, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015343592344688287, | |
| "loss": 2.8668, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015316420154519187, | |
| "loss": 2.8604, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015289247964350087, | |
| "loss": 2.857, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015262075774180982, | |
| "loss": 2.8599, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015234903584011882, | |
| "loss": 2.8653, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001520773139384278, | |
| "loss": 2.857, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001518055920367368, | |
| "loss": 2.8543, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015153658735406268, | |
| "loss": 2.8495, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015126486545237168, | |
| "loss": 2.8554, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.4514479800866822, | |
| "eval_loss": 2.870398998260498, | |
| "eval_runtime": 43.838, | |
| "eval_samples_per_second": 147.885, | |
| "eval_steps_per_second": 2.486, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015099314355068063, | |
| "loss": 2.8595, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015072142164898963, | |
| "loss": 2.855, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001504496997472986, | |
| "loss": 2.8663, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001501779778456076, | |
| "loss": 2.8555, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014990625594391658, | |
| "loss": 2.8596, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014963453404222556, | |
| "loss": 2.8589, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014936281214053456, | |
| "loss": 2.8568, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014909109023884354, | |
| "loss": 2.8474, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001488193683371525, | |
| "loss": 2.8515, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014854764643546151, | |
| "loss": 2.8526, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.45189379160579857, | |
| "eval_loss": 2.86692214012146, | |
| "eval_runtime": 43.3506, | |
| "eval_samples_per_second": 149.548, | |
| "eval_steps_per_second": 2.514, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001482759245337705, | |
| "loss": 2.8504, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001480042026320795, | |
| "loss": 2.854, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014773248073038847, | |
| "loss": 2.8512, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014746075882869744, | |
| "loss": 2.8515, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014718903692700642, | |
| "loss": 2.8492, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014691731502531542, | |
| "loss": 2.8491, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001466455931236244, | |
| "loss": 2.8466, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001463738712219334, | |
| "loss": 2.8508, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014610214932024237, | |
| "loss": 2.8567, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014583042741855135, | |
| "loss": 2.8521, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.45249203799983667, | |
| "eval_loss": 2.861818552017212, | |
| "eval_runtime": 43.168, | |
| "eval_samples_per_second": 150.181, | |
| "eval_steps_per_second": 2.525, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014555870551686032, | |
| "loss": 2.8463, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001452869836151693, | |
| "loss": 2.8433, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001450152617134783, | |
| "loss": 2.8446, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014474353981178728, | |
| "loss": 2.8477, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014447181791009628, | |
| "loss": 2.8439, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014420009600840525, | |
| "loss": 2.8459, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014392837410671423, | |
| "loss": 2.8445, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001436566522050232, | |
| "loss": 2.8455, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001433876475223491, | |
| "loss": 2.8474, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.000143118642839675, | |
| "loss": 2.8398, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.45218656334969587, | |
| "eval_loss": 2.8599517345428467, | |
| "eval_runtime": 43.8444, | |
| "eval_samples_per_second": 147.864, | |
| "eval_steps_per_second": 2.486, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.000142846920937984, | |
| "loss": 2.8492, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014257519903629296, | |
| "loss": 2.8434, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014230347713460197, | |
| "loss": 2.8483, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014203175523291094, | |
| "loss": 2.8441, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014176003333121992, | |
| "loss": 2.8474, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014148831142952892, | |
| "loss": 2.8385, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001412165895278379, | |
| "loss": 2.8424, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014094486762614687, | |
| "loss": 2.847, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014067314572445587, | |
| "loss": 2.8511, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014040142382276485, | |
| "loss": 2.8398, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.45275395982857125, | |
| "eval_loss": 2.8576090335845947, | |
| "eval_runtime": 43.2028, | |
| "eval_samples_per_second": 150.06, | |
| "eval_steps_per_second": 2.523, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014012970192107382, | |
| "loss": 2.8386, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013985798001938282, | |
| "loss": 2.8458, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001395862581176918, | |
| "loss": 2.8356, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013931453621600078, | |
| "loss": 2.8379, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013904281431430978, | |
| "loss": 2.8325, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013877109241261875, | |
| "loss": 2.8461, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013849937051092773, | |
| "loss": 2.8521, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013823036582825363, | |
| "loss": 2.8273, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013795864392656263, | |
| "loss": 2.8318, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001376869220248716, | |
| "loss": 2.837, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.4528289674654375, | |
| "eval_loss": 2.8535568714141846, | |
| "eval_runtime": 43.1874, | |
| "eval_samples_per_second": 150.113, | |
| "eval_steps_per_second": 2.524, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013741520012318058, | |
| "loss": 2.8396, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013714347822148956, | |
| "loss": 2.8395, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013687447353881546, | |
| "loss": 2.8325, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013660275163712444, | |
| "loss": 2.8412, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013633102973543344, | |
| "loss": 2.8392, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013605930783374242, | |
| "loss": 2.843, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001357875859320514, | |
| "loss": 2.8337, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013551586403036037, | |
| "loss": 2.8452, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013524414212866937, | |
| "loss": 2.8448, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013497242022697835, | |
| "loss": 2.837, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.4534701617805845, | |
| "eval_loss": 2.851900577545166, | |
| "eval_runtime": 43.1282, | |
| "eval_samples_per_second": 150.319, | |
| "eval_steps_per_second": 2.527, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013470069832528735, | |
| "loss": 2.8331, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013442897642359632, | |
| "loss": 2.832, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001341572545219053, | |
| "loss": 2.8255, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013388553262021427, | |
| "loss": 2.8327, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013361381071852328, | |
| "loss": 2.8386, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013334208881683225, | |
| "loss": 2.8315, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013307036691514125, | |
| "loss": 2.824, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013279864501345023, | |
| "loss": 2.8296, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001325269231117592, | |
| "loss": 2.8378, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001322579184290851, | |
| "loss": 2.8427, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.4535663409278566, | |
| "eval_loss": 2.8492891788482666, | |
| "eval_runtime": 43.4858, | |
| "eval_samples_per_second": 149.083, | |
| "eval_steps_per_second": 2.507, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013198619652739408, | |
| "loss": 2.8329, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013171447462570306, | |
| "loss": 2.8389, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013144275272401206, | |
| "loss": 2.8358, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013117103082232104, | |
| "loss": 2.8369, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013089930892063, | |
| "loss": 2.8294, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000130627587018939, | |
| "loss": 2.834, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000130355865117248, | |
| "loss": 2.8414, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013008414321555696, | |
| "loss": 2.8384, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012981242131386597, | |
| "loss": 2.8384, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012954069941217494, | |
| "loss": 2.8365, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.45409986299008265, | |
| "eval_loss": 2.8467965126037598, | |
| "eval_runtime": 47.1796, | |
| "eval_samples_per_second": 137.411, | |
| "eval_steps_per_second": 2.31, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012926897751048392, | |
| "loss": 2.8281, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012899725560879292, | |
| "loss": 2.8197, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001287255337071019, | |
| "loss": 2.8233, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012845652902442777, | |
| "loss": 2.828, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012818480712273677, | |
| "loss": 2.8334, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012791308522104578, | |
| "loss": 2.8332, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012764136331935475, | |
| "loss": 2.8279, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012736964141766373, | |
| "loss": 2.8271, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001270979195159727, | |
| "loss": 2.8306, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012682619761428168, | |
| "loss": 2.8327, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.4538736302788893, | |
| "eval_loss": 2.8447225093841553, | |
| "eval_runtime": 44.4204, | |
| "eval_samples_per_second": 145.946, | |
| "eval_steps_per_second": 2.454, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012655447571259068, | |
| "loss": 2.836, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012628275381089965, | |
| "loss": 2.8337, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012601103190920866, | |
| "loss": 2.8333, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012573931000751763, | |
| "loss": 2.8298, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001254675881058266, | |
| "loss": 2.8285, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012519586620413558, | |
| "loss": 2.8252, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012492414430244459, | |
| "loss": 2.8227, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012465242240075356, | |
| "loss": 2.8286, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012438070049906256, | |
| "loss": 2.8218, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012410897859737154, | |
| "loss": 2.8289, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.4545583774154425, | |
| "eval_loss": 2.838773012161255, | |
| "eval_runtime": 43.8892, | |
| "eval_samples_per_second": 147.713, | |
| "eval_steps_per_second": 2.484, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001238372566956805, | |
| "loss": 2.8198, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001235655347939895, | |
| "loss": 2.8207, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012329381289229846, | |
| "loss": 2.8296, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012302209099060747, | |
| "loss": 2.8293, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012275036908891647, | |
| "loss": 2.8188, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012247864718722544, | |
| "loss": 2.819, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012220692528553442, | |
| "loss": 2.8219, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001219352033838434, | |
| "loss": 2.8199, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001216634814821524, | |
| "loss": 2.8282, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012139175958046137, | |
| "loss": 2.8166, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.45473863770404044, | |
| "eval_loss": 2.834634780883789, | |
| "eval_runtime": 43.1108, | |
| "eval_samples_per_second": 150.38, | |
| "eval_steps_per_second": 2.528, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012112003767877036, | |
| "loss": 2.8226, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012084831577707934, | |
| "loss": 2.8135, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012057659387538832, | |
| "loss": 2.8134, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001203048719736973, | |
| "loss": 2.8214, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001200358672910232, | |
| "loss": 2.8142, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011976414538933219, | |
| "loss": 2.8196, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011949242348764117, | |
| "loss": 2.8145, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011922070158595016, | |
| "loss": 2.8093, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011894897968425913, | |
| "loss": 2.8168, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011867725778256813, | |
| "loss": 2.8171, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.45580810142968187, | |
| "eval_loss": 2.8293869495391846, | |
| "eval_runtime": 44.4137, | |
| "eval_samples_per_second": 145.968, | |
| "eval_steps_per_second": 2.454, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011840553588087711, | |
| "loss": 2.8123, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0001181338139791861, | |
| "loss": 2.8121, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011786209207749507, | |
| "loss": 2.8083, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011759037017580405, | |
| "loss": 2.8156, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011731864827411304, | |
| "loss": 2.8225, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011704692637242204, | |
| "loss": 2.8109, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011677520447073102, | |
| "loss": 2.8137, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0001165061997880569, | |
| "loss": 2.8097, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011623447788636588, | |
| "loss": 2.8099, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011596275598467488, | |
| "loss": 2.8184, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.4556344950443543, | |
| "eval_loss": 2.826944589614868, | |
| "eval_runtime": 43.7297, | |
| "eval_samples_per_second": 148.252, | |
| "eval_steps_per_second": 2.493, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011569103408298386, | |
| "loss": 2.8164, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011541931218129285, | |
| "loss": 2.8137, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011514759027960182, | |
| "loss": 2.8168, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011487858559692771, | |
| "loss": 2.8156, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011460686369523672, | |
| "loss": 2.8114, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011433514179354569, | |
| "loss": 2.8066, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011406341989185468, | |
| "loss": 2.8124, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011379169799016366, | |
| "loss": 2.8093, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011351997608847263, | |
| "loss": 2.8131, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011324825418678162, | |
| "loss": 2.8102, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.45632710588477254, | |
| "eval_loss": 2.8243494033813477, | |
| "eval_runtime": 42.7646, | |
| "eval_samples_per_second": 151.597, | |
| "eval_steps_per_second": 2.549, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011297653228509062, | |
| "loss": 2.8064, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001127048103833996, | |
| "loss": 2.8075, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011243308848170857, | |
| "loss": 2.8146, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011216136658001756, | |
| "loss": 2.8166, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011188964467832654, | |
| "loss": 2.8073, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011161792277663554, | |
| "loss": 2.8116, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011134620087494451, | |
| "loss": 2.807, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001110744789732535, | |
| "loss": 2.8066, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011080547429057939, | |
| "loss": 2.8101, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011053375238888837, | |
| "loss": 2.8153, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.45636279500231375, | |
| "eval_loss": 2.821134328842163, | |
| "eval_runtime": 42.931, | |
| "eval_samples_per_second": 151.01, | |
| "eval_steps_per_second": 2.539, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011026203048719737, | |
| "loss": 2.8109, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00010999030858550635, | |
| "loss": 2.8025, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010971858668381533, | |
| "loss": 2.8055, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010944686478212431, | |
| "loss": 2.8047, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010917514288043329, | |
| "loss": 2.8095, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010890342097874227, | |
| "loss": 2.805, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010863169907705128, | |
| "loss": 2.8079, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010835997717536025, | |
| "loss": 2.8071, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010809097249268614, | |
| "loss": 2.8016, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010781925059099512, | |
| "loss": 2.8035, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.4569090199707833, | |
| "eval_loss": 2.8184897899627686, | |
| "eval_runtime": 43.5955, | |
| "eval_samples_per_second": 148.708, | |
| "eval_steps_per_second": 2.5, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010755024590832102, | |
| "loss": 2.8002, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010727852400663001, | |
| "loss": 2.8186, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010700680210493899, | |
| "loss": 2.8036, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010673508020324797, | |
| "loss": 2.8077, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010646335830155695, | |
| "loss": 2.8111, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010619163639986595, | |
| "loss": 2.8018, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010591991449817493, | |
| "loss": 2.8079, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010564819259648392, | |
| "loss": 2.8124, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010537647069479289, | |
| "loss": 2.807, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010510474879310187, | |
| "loss": 2.8042, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.4569186983755403, | |
| "eval_loss": 2.8206183910369873, | |
| "eval_runtime": 44.1793, | |
| "eval_samples_per_second": 146.743, | |
| "eval_steps_per_second": 2.467, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010483302689141086, | |
| "loss": 2.8066, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010456130498971986, | |
| "loss": 2.8088, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010428958308802883, | |
| "loss": 2.8036, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010401786118633781, | |
| "loss": 2.7985, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001037461392846468, | |
| "loss": 2.7981, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010347441738295577, | |
| "loss": 2.7993, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010320269548126476, | |
| "loss": 2.7999, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010293097357957375, | |
| "loss": 2.8009, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010265925167788274, | |
| "loss": 2.7943, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010238752977619171, | |
| "loss": 2.7984, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.457420160722009, | |
| "eval_loss": 2.8137617111206055, | |
| "eval_runtime": 43.507, | |
| "eval_samples_per_second": 149.01, | |
| "eval_steps_per_second": 2.505, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001021158078745007, | |
| "loss": 2.7913, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010184408597280968, | |
| "loss": 2.8016, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010157236407111868, | |
| "loss": 2.7988, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010130064216942766, | |
| "loss": 2.792, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010103163748675355, | |
| "loss": 2.7926, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010075991558506253, | |
| "loss": 2.7796, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010048819368337151, | |
| "loss": 2.7971, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010021647178168051, | |
| "loss": 2.7974, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.994474987998949e-05, | |
| "loss": 2.7951, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.967302797829848e-05, | |
| "loss": 2.7883, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.45740261861338705, | |
| "eval_loss": 2.8111917972564697, | |
| "eval_runtime": 44.0953, | |
| "eval_samples_per_second": 147.023, | |
| "eval_steps_per_second": 2.472, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.940130607660745e-05, | |
| "loss": 2.7898, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.912958417491643e-05, | |
| "loss": 2.7914, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.885786227322542e-05, | |
| "loss": 2.798, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.85861403715344e-05, | |
| "loss": 2.7938, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.83144184698434e-05, | |
| "loss": 2.7927, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.804269656815237e-05, | |
| "loss": 2.7967, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.777369188547826e-05, | |
| "loss": 2.7933, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.750196998378726e-05, | |
| "loss": 2.7913, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.723024808209624e-05, | |
| "loss": 2.7924, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.695852618040523e-05, | |
| "loss": 2.7962, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.4583686443881887, | |
| "eval_loss": 2.8055942058563232, | |
| "eval_runtime": 44.8912, | |
| "eval_samples_per_second": 144.416, | |
| "eval_steps_per_second": 2.428, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.66868042787142e-05, | |
| "loss": 2.7848, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.641779959604009e-05, | |
| "loss": 2.7935, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.61460776943491e-05, | |
| "loss": 2.7961, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.587435579265807e-05, | |
| "loss": 2.788, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.560263389096706e-05, | |
| "loss": 2.7934, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.533091198927603e-05, | |
| "loss": 2.7888, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.505919008758501e-05, | |
| "loss": 2.7954, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.4787468185894e-05, | |
| "loss": 2.7934, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.451574628420299e-05, | |
| "loss": 2.7867, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.424402438251197e-05, | |
| "loss": 2.7937, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.4582416153257539, | |
| "eval_loss": 2.8068454265594482, | |
| "eval_runtime": 44.3778, | |
| "eval_samples_per_second": 146.087, | |
| "eval_steps_per_second": 2.456, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.397230248082095e-05, | |
| "loss": 2.7933, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.370058057912994e-05, | |
| "loss": 2.7876, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.342885867743891e-05, | |
| "loss": 2.7885, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.31571367757479e-05, | |
| "loss": 2.7859, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.288541487405689e-05, | |
| "loss": 2.7867, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.261369297236588e-05, | |
| "loss": 2.7882, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.234197107067486e-05, | |
| "loss": 2.7874, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.207024916898384e-05, | |
| "loss": 2.79, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.179852726729282e-05, | |
| "loss": 2.7828, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.152680536560182e-05, | |
| "loss": 2.7853, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.4587721128864935, | |
| "eval_loss": 2.801090955734253, | |
| "eval_runtime": 43.1479, | |
| "eval_samples_per_second": 150.251, | |
| "eval_steps_per_second": 2.526, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.12550834639108e-05, | |
| "loss": 2.7861, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.098336156221979e-05, | |
| "loss": 2.793, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.071163966052876e-05, | |
| "loss": 2.7914, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.043991775883774e-05, | |
| "loss": 2.7774, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.016819585714673e-05, | |
| "loss": 2.7791, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.989647395545573e-05, | |
| "loss": 2.7837, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.96247520537647e-05, | |
| "loss": 2.779, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.935303015207368e-05, | |
| "loss": 2.7807, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.908130825038267e-05, | |
| "loss": 2.7832, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.880958634869164e-05, | |
| "loss": 2.7798, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.4596697849276993, | |
| "eval_loss": 2.795370578765869, | |
| "eval_runtime": 43.9941, | |
| "eval_samples_per_second": 147.361, | |
| "eval_steps_per_second": 2.478, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.853786444700063e-05, | |
| "loss": 2.7851, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.826885976432654e-05, | |
| "loss": 2.7819, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.799713786263551e-05, | |
| "loss": 2.7767, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.77254159609445e-05, | |
| "loss": 2.7745, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.745369405925347e-05, | |
| "loss": 2.7807, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.718197215756246e-05, | |
| "loss": 2.7828, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.691025025587145e-05, | |
| "loss": 2.7768, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.663852835418044e-05, | |
| "loss": 2.7749, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.636680645248942e-05, | |
| "loss": 2.7782, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.60950845507984e-05, | |
| "loss": 2.7851, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.4597998384916206, | |
| "eval_loss": 2.7913172245025635, | |
| "eval_runtime": 43.6998, | |
| "eval_samples_per_second": 148.353, | |
| "eval_steps_per_second": 2.494, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.582336264910738e-05, | |
| "loss": 2.7722, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.555435796643328e-05, | |
| "loss": 2.7695, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.528535328375917e-05, | |
| "loss": 2.7732, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.501363138206815e-05, | |
| "loss": 2.7714, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.474190948037714e-05, | |
| "loss": 2.7739, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.447018757868613e-05, | |
| "loss": 2.7733, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.419846567699512e-05, | |
| "loss": 2.773, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.392674377530409e-05, | |
| "loss": 2.7754, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.365502187361308e-05, | |
| "loss": 2.7817, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.338329997192206e-05, | |
| "loss": 2.7831, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.46004845251381443, | |
| "eval_loss": 2.78973126411438, | |
| "eval_runtime": 44.9439, | |
| "eval_samples_per_second": 144.247, | |
| "eval_steps_per_second": 2.425, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.311157807023106e-05, | |
| "loss": 2.7739, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.283985616854003e-05, | |
| "loss": 2.781, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.256813426684902e-05, | |
| "loss": 2.7773, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.2296412365158e-05, | |
| "loss": 2.7688, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.202469046346699e-05, | |
| "loss": 2.7765, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.175568578079289e-05, | |
| "loss": 2.7735, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.148396387910187e-05, | |
| "loss": 2.7692, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.121224197741084e-05, | |
| "loss": 2.7661, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.094052007571983e-05, | |
| "loss": 2.7714, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.06687981740288e-05, | |
| "loss": 2.7773, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.4603297311520629, | |
| "eval_loss": 2.786165475845337, | |
| "eval_runtime": 45.3636, | |
| "eval_samples_per_second": 142.912, | |
| "eval_steps_per_second": 2.403, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.03970762723378e-05, | |
| "loss": 2.77, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.012535437064678e-05, | |
| "loss": 2.772, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.985363246895577e-05, | |
| "loss": 2.7751, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.958191056726475e-05, | |
| "loss": 2.7705, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.931018866557374e-05, | |
| "loss": 2.7711, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.903846676388271e-05, | |
| "loss": 2.7666, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.87667448621917e-05, | |
| "loss": 2.7678, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.84977401795176e-05, | |
| "loss": 2.7707, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.822601827782658e-05, | |
| "loss": 2.7624, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.795429637613557e-05, | |
| "loss": 2.7688, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.4608795855223163, | |
| "eval_loss": 2.7835707664489746, | |
| "eval_runtime": 44.1206, | |
| "eval_samples_per_second": 146.938, | |
| "eval_steps_per_second": 2.47, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.768257447444454e-05, | |
| "loss": 2.7652, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.741085257275354e-05, | |
| "loss": 2.763, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.713913067106252e-05, | |
| "loss": 2.7718, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.686740876937151e-05, | |
| "loss": 2.774, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.659568686768048e-05, | |
| "loss": 2.7624, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.632396496598946e-05, | |
| "loss": 2.7672, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.605224306429845e-05, | |
| "loss": 2.7646, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.578052116260744e-05, | |
| "loss": 2.7643, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.550879926091643e-05, | |
| "loss": 2.7636, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.523979457824232e-05, | |
| "loss": 2.7658, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.4610453282037788, | |
| "eval_loss": 2.7798171043395996, | |
| "eval_runtime": 44.7143, | |
| "eval_samples_per_second": 144.987, | |
| "eval_steps_per_second": 2.438, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.49680726765513e-05, | |
| "loss": 2.7694, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.469635077486028e-05, | |
| "loss": 2.7662, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.442734609218618e-05, | |
| "loss": 2.7624, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.415562419049516e-05, | |
| "loss": 2.7632, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.388390228880415e-05, | |
| "loss": 2.7697, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.361218038711314e-05, | |
| "loss": 2.7663, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.334045848542211e-05, | |
| "loss": 2.7623, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.306873658373109e-05, | |
| "loss": 2.7685, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.279701468204009e-05, | |
| "loss": 2.7702, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.252529278034907e-05, | |
| "loss": 2.7622, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.4611511857558078, | |
| "eval_loss": 2.781484603881836, | |
| "eval_runtime": 43.3638, | |
| "eval_samples_per_second": 149.503, | |
| "eval_steps_per_second": 2.514, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.225357087865804e-05, | |
| "loss": 2.7672, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.198184897696703e-05, | |
| "loss": 2.7652, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.171012707527602e-05, | |
| "loss": 2.7671, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.143840517358501e-05, | |
| "loss": 2.7621, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.11694004909109e-05, | |
| "loss": 2.7662, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.089767858921989e-05, | |
| "loss": 2.7684, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.062595668752886e-05, | |
| "loss": 2.7662, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.035423478583785e-05, | |
| "loss": 2.7638, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.008251288414684e-05, | |
| "loss": 2.7639, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.981079098245581e-05, | |
| "loss": 2.7691, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.46120986108464673, | |
| "eval_loss": 2.7783455848693848, | |
| "eval_runtime": 43.5919, | |
| "eval_samples_per_second": 148.72, | |
| "eval_steps_per_second": 2.5, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.95390690807648e-05, | |
| "loss": 2.7649, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.926734717907379e-05, | |
| "loss": 2.7638, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.899562527738277e-05, | |
| "loss": 2.7675, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.872390337569176e-05, | |
| "loss": 2.7657, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.845218147400074e-05, | |
| "loss": 2.7612, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.818045957230972e-05, | |
| "loss": 2.7682, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.79087376706187e-05, | |
| "loss": 2.7588, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.763701576892768e-05, | |
| "loss": 2.765, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.736529386723667e-05, | |
| "loss": 2.7556, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.709357196554565e-05, | |
| "loss": 2.7579, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.4619333218402277, | |
| "eval_loss": 2.7711987495422363, | |
| "eval_runtime": 43.3357, | |
| "eval_samples_per_second": 149.6, | |
| "eval_steps_per_second": 2.515, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.682185006385464e-05, | |
| "loss": 2.7538, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.655012816216363e-05, | |
| "loss": 2.7596, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.62784062604726e-05, | |
| "loss": 2.7512, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.600668435878159e-05, | |
| "loss": 2.7559, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.573496245709058e-05, | |
| "loss": 2.7574, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.546324055539957e-05, | |
| "loss": 2.7614, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.519423587272546e-05, | |
| "loss": 2.7501, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.492251397103445e-05, | |
| "loss": 2.7488, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.465079206934342e-05, | |
| "loss": 2.7497, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.437907016765241e-05, | |
| "loss": 2.7614, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.46246986840394033, | |
| "eval_loss": 2.7673110961914062, | |
| "eval_runtime": 43.038, | |
| "eval_samples_per_second": 150.634, | |
| "eval_steps_per_second": 2.533, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.41073482659614e-05, | |
| "loss": 2.7544, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.383834358328728e-05, | |
| "loss": 2.7546, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.356662168159627e-05, | |
| "loss": 2.7564, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.329489977990525e-05, | |
| "loss": 2.759, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.302317787821423e-05, | |
| "loss": 2.7586, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.275145597652322e-05, | |
| "loss": 2.7546, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.247973407483221e-05, | |
| "loss": 2.7548, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.220801217314118e-05, | |
| "loss": 2.7527, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.193629027145017e-05, | |
| "loss": 2.7607, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.166456836975916e-05, | |
| "loss": 2.7592, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.46232166783109974, | |
| "eval_loss": 2.7691469192504883, | |
| "eval_runtime": 43.5697, | |
| "eval_samples_per_second": 148.796, | |
| "eval_steps_per_second": 2.502, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.139284646806815e-05, | |
| "loss": 2.7481, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.112112456637712e-05, | |
| "loss": 2.7579, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.0849402664686106e-05, | |
| "loss": 2.7559, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.05776807629951e-05, | |
| "loss": 2.7515, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.030595886130408e-05, | |
| "loss": 2.7524, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.003423695961306e-05, | |
| "loss": 2.7395, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.976251505792205e-05, | |
| "loss": 2.7438, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.949079315623103e-05, | |
| "loss": 2.7468, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.921907125454001e-05, | |
| "loss": 2.7423, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.8947349352849e-05, | |
| "loss": 2.7551, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.4633808482516869, | |
| "eval_loss": 2.760658025741577, | |
| "eval_runtime": 43.7777, | |
| "eval_samples_per_second": 148.089, | |
| "eval_steps_per_second": 2.49, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.867562745115798e-05, | |
| "loss": 2.7352, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.8403905549466965e-05, | |
| "loss": 2.751, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.813490086679286e-05, | |
| "loss": 2.7456, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.7863178965101844e-05, | |
| "loss": 2.7491, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.759145706341083e-05, | |
| "loss": 2.7477, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.7319735161719815e-05, | |
| "loss": 2.7431, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.70480132600288e-05, | |
| "loss": 2.7406, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.6776291358337786e-05, | |
| "loss": 2.7444, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.650456945664677e-05, | |
| "loss": 2.7437, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.623284755495574e-05, | |
| "loss": 2.7397, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.4636597072887461, | |
| "eval_loss": 2.7578768730163574, | |
| "eval_runtime": 43.3807, | |
| "eval_samples_per_second": 149.444, | |
| "eval_steps_per_second": 2.513, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.596112565326473e-05, | |
| "loss": 2.7456, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.5689403751573714e-05, | |
| "loss": 2.7393, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.54176818498827e-05, | |
| "loss": 2.74, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.5145959948191685e-05, | |
| "loss": 2.7411, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.487695526551758e-05, | |
| "loss": 2.747, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.4605233363826564e-05, | |
| "loss": 2.741, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.433622868115246e-05, | |
| "loss": 2.7441, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.406450677946144e-05, | |
| "loss": 2.7447, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.3792784877770425e-05, | |
| "loss": 2.7517, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.3521062976079414e-05, | |
| "loss": 2.7357, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.4636022417605018, | |
| "eval_loss": 2.758023738861084, | |
| "eval_runtime": 43.2538, | |
| "eval_samples_per_second": 149.883, | |
| "eval_steps_per_second": 2.52, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.3249341074388396e-05, | |
| "loss": 2.7429, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.297761917269738e-05, | |
| "loss": 2.7445, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.270589727100637e-05, | |
| "loss": 2.7473, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.243417536931535e-05, | |
| "loss": 2.7404, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.216245346762434e-05, | |
| "loss": 2.7401, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.189073156593331e-05, | |
| "loss": 2.7441, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1619009664242295e-05, | |
| "loss": 2.737, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1347287762551284e-05, | |
| "loss": 2.7337, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1075565860860266e-05, | |
| "loss": 2.7422, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.080384395916925e-05, | |
| "loss": 2.7452, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.46426944678843307, | |
| "eval_loss": 2.751744031906128, | |
| "eval_runtime": 44.8905, | |
| "eval_samples_per_second": 144.418, | |
| "eval_steps_per_second": 2.428, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.0532122057478237e-05, | |
| "loss": 2.7387, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.026311737480413e-05, | |
| "loss": 2.7342, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.9991395473113116e-05, | |
| "loss": 2.7349, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.97196735714221e-05, | |
| "loss": 2.7388, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.944795166973108e-05, | |
| "loss": 2.7397, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.917622976804007e-05, | |
| "loss": 2.7352, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.890450786634905e-05, | |
| "loss": 2.7392, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.863278596465803e-05, | |
| "loss": 2.7419, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.836106406296702e-05, | |
| "loss": 2.738, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.8089342161276004e-05, | |
| "loss": 2.7418, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.46412548051767366, | |
| "eval_loss": 2.7533059120178223, | |
| "eval_runtime": 43.1643, | |
| "eval_samples_per_second": 150.193, | |
| "eval_steps_per_second": 2.525, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.781762025958498e-05, | |
| "loss": 2.7372, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.7545898357893974e-05, | |
| "loss": 2.7369, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.727417645620295e-05, | |
| "loss": 2.7331, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.700245455451193e-05, | |
| "loss": 2.7379, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.673073265282092e-05, | |
| "loss": 2.7341, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.64590107511299e-05, | |
| "loss": 2.7359, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.618728884943889e-05, | |
| "loss": 2.737, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5915566947747873e-05, | |
| "loss": 2.7343, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.564656226507377e-05, | |
| "loss": 2.7346, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.537484036338275e-05, | |
| "loss": 2.7379, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.46473280041617143, | |
| "eval_loss": 2.748091697692871, | |
| "eval_runtime": 43.4169, | |
| "eval_samples_per_second": 149.32, | |
| "eval_steps_per_second": 2.511, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5103118461691735e-05, | |
| "loss": 2.7341, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4831396560000724e-05, | |
| "loss": 2.7431, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4559674658309706e-05, | |
| "loss": 2.7347, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.428795275661869e-05, | |
| "loss": 2.7366, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4016230854927676e-05, | |
| "loss": 2.7344, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.374450895323666e-05, | |
| "loss": 2.7382, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.347278705154564e-05, | |
| "loss": 2.7279, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.320106514985463e-05, | |
| "loss": 2.7307, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.292934324816361e-05, | |
| "loss": 2.7275, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.26603385654895e-05, | |
| "loss": 2.7308, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.4653649212268588, | |
| "eval_loss": 2.7459847927093506, | |
| "eval_runtime": 43.1356, | |
| "eval_samples_per_second": 150.294, | |
| "eval_steps_per_second": 2.527, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.2388616663798484e-05, | |
| "loss": 2.7304, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.211689476210747e-05, | |
| "loss": 2.7334, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1845172860416455e-05, | |
| "loss": 2.7324, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.157345095872544e-05, | |
| "loss": 2.7338, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1301729057034425e-05, | |
| "loss": 2.7334, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.103000715534341e-05, | |
| "loss": 2.7323, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.075828525365239e-05, | |
| "loss": 2.7338, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.048656335196138e-05, | |
| "loss": 2.73, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.021484145027036e-05, | |
| "loss": 2.7367, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.994311954857934e-05, | |
| "loss": 2.727, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.46549799929226665, | |
| "eval_loss": 2.740849018096924, | |
| "eval_runtime": 43.5693, | |
| "eval_samples_per_second": 148.797, | |
| "eval_steps_per_second": 2.502, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.967139764688833e-05, | |
| "loss": 2.7257, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.939967574519731e-05, | |
| "loss": 2.7251, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9127953843506295e-05, | |
| "loss": 2.7236, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.8856231941815284e-05, | |
| "loss": 2.7224, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.8584510040124266e-05, | |
| "loss": 2.7204, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.831278813843325e-05, | |
| "loss": 2.7249, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.804106623674224e-05, | |
| "loss": 2.7214, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.776934433505122e-05, | |
| "loss": 2.7242, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.74976224333602e-05, | |
| "loss": 2.7147, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.722861775068609e-05, | |
| "loss": 2.7282, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.4663823635269317, | |
| "eval_loss": 2.7350597381591797, | |
| "eval_runtime": 43.4285, | |
| "eval_samples_per_second": 149.28, | |
| "eval_steps_per_second": 2.51, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.695689584899508e-05, | |
| "loss": 2.718, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.668517394730406e-05, | |
| "loss": 2.7174, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6413452045613044e-05, | |
| "loss": 2.7205, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.614173014392203e-05, | |
| "loss": 2.7195, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5870008242231015e-05, | |
| "loss": 2.7172, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.559828634054e-05, | |
| "loss": 2.7128, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.532656443884898e-05, | |
| "loss": 2.7192, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.505484253715797e-05, | |
| "loss": 2.7191, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.478312063546695e-05, | |
| "loss": 2.7178, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.451139873377593e-05, | |
| "loss": 2.7133, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.46685176615764307, | |
| "eval_loss": 2.730079412460327, | |
| "eval_runtime": 43.3235, | |
| "eval_samples_per_second": 149.642, | |
| "eval_steps_per_second": 2.516, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.423967683208492e-05, | |
| "loss": 2.7164, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.39679549303939e-05, | |
| "loss": 2.7106, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.3696233028702885e-05, | |
| "loss": 2.715, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.3424511127011874e-05, | |
| "loss": 2.7091, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.3152789225320856e-05, | |
| "loss": 2.7093, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.288106732362984e-05, | |
| "loss": 2.7116, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.260934542193883e-05, | |
| "loss": 2.7172, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.233762352024781e-05, | |
| "loss": 2.7072, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.206590161855679e-05, | |
| "loss": 2.7165, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.179417971686577e-05, | |
| "loss": 2.7136, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.4673356863954899, | |
| "eval_loss": 2.7250616550445557, | |
| "eval_runtime": 43.1535, | |
| "eval_samples_per_second": 150.231, | |
| "eval_steps_per_second": 2.526, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.152245781517476e-05, | |
| "loss": 2.7117, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.1250735913483744e-05, | |
| "loss": 2.7099, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0979014011792726e-05, | |
| "loss": 2.715, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0707292110101715e-05, | |
| "loss": 2.7119, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0435570208410697e-05, | |
| "loss": 2.7136, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.016384830671968e-05, | |
| "loss": 2.7069, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9892126405028664e-05, | |
| "loss": 2.7092, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.962040450333765e-05, | |
| "loss": 2.7052, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.934868260164663e-05, | |
| "loss": 2.7099, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9076960699955617e-05, | |
| "loss": 2.7108, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.46786981335801325, | |
| "eval_loss": 2.7208478450775146, | |
| "eval_runtime": 43.4331, | |
| "eval_samples_per_second": 149.264, | |
| "eval_steps_per_second": 2.51, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8807956017281514e-05, | |
| "loss": 2.7137, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8536234115590493e-05, | |
| "loss": 2.7069, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8264512213899478e-05, | |
| "loss": 2.698, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.7992790312208464e-05, | |
| "loss": 2.7027, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.7721068410517446e-05, | |
| "loss": 2.7062, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.744934650882643e-05, | |
| "loss": 2.7064, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.718034182615232e-05, | |
| "loss": 2.7059, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.691133714347822e-05, | |
| "loss": 2.7146, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.6639615241787204e-05, | |
| "loss": 2.7036, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.6367893340096186e-05, | |
| "loss": 2.7051, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.46807245495761163, | |
| "eval_loss": 2.7191717624664307, | |
| "eval_runtime": 43.4633, | |
| "eval_samples_per_second": 149.16, | |
| "eval_steps_per_second": 2.508, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.609617143840517e-05, | |
| "loss": 2.7007, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.5824449536714157e-05, | |
| "loss": 2.7024, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.555272763502314e-05, | |
| "loss": 2.7027, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.5281005733332124e-05, | |
| "loss": 2.7082, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.500928383164111e-05, | |
| "loss": 2.7067, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.4737561929950092e-05, | |
| "loss": 2.7044, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.4465840028259074e-05, | |
| "loss": 2.705, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.419411812656806e-05, | |
| "loss": 2.7069, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3922396224877045e-05, | |
| "loss": 2.7005, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3650674323186027e-05, | |
| "loss": 2.7013, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.4687317962816779, | |
| "eval_loss": 2.7151107788085938, | |
| "eval_runtime": 43.2863, | |
| "eval_samples_per_second": 149.77, | |
| "eval_steps_per_second": 2.518, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3378952421495012e-05, | |
| "loss": 2.7029, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3107230519803998e-05, | |
| "loss": 2.7007, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.283550861811298e-05, | |
| "loss": 2.7089, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.2563786716421965e-05, | |
| "loss": 2.7018, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.229206481473095e-05, | |
| "loss": 2.6984, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.202306013205684e-05, | |
| "loss": 2.7011, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1751338230365826e-05, | |
| "loss": 2.6968, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1479616328674812e-05, | |
| "loss": 2.701, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1207894426983794e-05, | |
| "loss": 2.7079, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.093617252529278e-05, | |
| "loss": 2.6996, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.46891387127116774, | |
| "eval_loss": 2.7129361629486084, | |
| "eval_runtime": 43.7353, | |
| "eval_samples_per_second": 148.233, | |
| "eval_steps_per_second": 2.492, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0664450623601765e-05, | |
| "loss": 2.6985, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0392728721910743e-05, | |
| "loss": 2.6945, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.012100682021973e-05, | |
| "loss": 2.6988, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.9849284918528714e-05, | |
| "loss": 2.701, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9577563016837696e-05, | |
| "loss": 2.7044, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.930584111514668e-05, | |
| "loss": 2.6897, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9034119213455667e-05, | |
| "loss": 2.6993, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8762397311764652e-05, | |
| "loss": 2.6978, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8490675410073634e-05, | |
| "loss": 2.6965, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.821895350838262e-05, | |
| "loss": 2.6898, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.46940021111020375, | |
| "eval_loss": 2.7084131240844727, | |
| "eval_runtime": 44.0036, | |
| "eval_samples_per_second": 147.329, | |
| "eval_steps_per_second": 2.477, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7947231606691602e-05, | |
| "loss": 2.6918, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7675509705000587e-05, | |
| "loss": 2.6941, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7403787803309573e-05, | |
| "loss": 2.6954, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7132065901618555e-05, | |
| "loss": 2.7015, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.686034399992754e-05, | |
| "loss": 2.698, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6588622098236522e-05, | |
| "loss": 2.6922, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6319617415562416e-05, | |
| "loss": 2.6932, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.60478955138714e-05, | |
| "loss": 2.6887, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5776173612180387e-05, | |
| "loss": 2.6887, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.550445171048937e-05, | |
| "loss": 2.688, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.4697316964731288, | |
| "eval_loss": 2.705327272415161, | |
| "eval_runtime": 43.7246, | |
| "eval_samples_per_second": 148.269, | |
| "eval_steps_per_second": 2.493, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5232729808798354e-05, | |
| "loss": 2.6933, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4961007907107338e-05, | |
| "loss": 2.6992, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.468928600541632e-05, | |
| "loss": 2.6943, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4417564103725306e-05, | |
| "loss": 2.6919, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.414584220203429e-05, | |
| "loss": 2.6961, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3874120300343275e-05, | |
| "loss": 2.6942, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3602398398652258e-05, | |
| "loss": 2.6936, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3330676496961242e-05, | |
| "loss": 2.6851, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3058954595270228e-05, | |
| "loss": 2.6929, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.278723269357921e-05, | |
| "loss": 2.6855, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.4701273012675686, | |
| "eval_loss": 2.701770305633545, | |
| "eval_runtime": 44.1379, | |
| "eval_samples_per_second": 146.881, | |
| "eval_steps_per_second": 2.47, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2515510791888195e-05, | |
| "loss": 2.6922, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2243788890197179e-05, | |
| "loss": 2.6811, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1972066988506163e-05, | |
| "loss": 2.6819, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1700345086815148e-05, | |
| "loss": 2.6882, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.142862318512413e-05, | |
| "loss": 2.685, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1159618502450025e-05, | |
| "loss": 2.6841, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0887896600759008e-05, | |
| "loss": 2.6806, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0616174699067993e-05, | |
| "loss": 2.6896, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0344452797376977e-05, | |
| "loss": 2.6807, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0072730895685962e-05, | |
| "loss": 2.6852, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.4704999198507106, | |
| "eval_loss": 2.698939085006714, | |
| "eval_runtime": 43.9086, | |
| "eval_samples_per_second": 147.648, | |
| "eval_steps_per_second": 2.482, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.803726213011856e-06, | |
| "loss": 2.6861, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.53200431132084e-06, | |
| "loss": 2.6886, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.260282409629823e-06, | |
| "loss": 2.6872, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.988560507938807e-06, | |
| "loss": 2.685, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.71683860624779e-06, | |
| "loss": 2.6892, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.445116704556776e-06, | |
| "loss": 2.6815, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.17339480286576e-06, | |
| "loss": 2.6879, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 7.901672901174744e-06, | |
| "loss": 2.6822, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.629950999483727e-06, | |
| "loss": 2.6806, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.360946316809621e-06, | |
| "loss": 2.689, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.4705204864608191, | |
| "eval_loss": 2.6981818675994873, | |
| "eval_runtime": 43.1633, | |
| "eval_samples_per_second": 150.197, | |
| "eval_steps_per_second": 2.525, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.089224415118606e-06, | |
| "loss": 2.6872, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.81750251342759e-06, | |
| "loss": 2.6962, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.545780611736574e-06, | |
| "loss": 2.6831, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.274058710045559e-06, | |
| "loss": 2.6877, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.0023368083545415e-06, | |
| "loss": 2.6956, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.730614906663526e-06, | |
| "loss": 2.6936, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.458893004972511e-06, | |
| "loss": 2.6864, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.187171103281495e-06, | |
| "loss": 2.6838, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.915449201590478e-06, | |
| "loss": 2.6867, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.643727299899463e-06, | |
| "loss": 2.6868, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.4707297819636878, | |
| "eval_loss": 2.6994001865386963, | |
| "eval_runtime": 43.0302, | |
| "eval_samples_per_second": 150.662, | |
| "eval_steps_per_second": 2.533, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.3720053982084465e-06, | |
| "loss": 2.689, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.10028349651743e-06, | |
| "loss": 2.6831, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.831278813843325e-06, | |
| "loss": 2.6825, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.559556912152309e-06, | |
| "loss": 2.6851, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.2878350104612927e-06, | |
| "loss": 2.6798, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.016113108770277e-06, | |
| "loss": 2.6773, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.744391207079261e-06, | |
| "loss": 2.6829, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.472669305388245e-06, | |
| "loss": 2.6819, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.2036646227141394e-06, | |
| "loss": 2.6827, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.931942721023123e-06, | |
| "loss": 2.6901, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.47069106834466007, | |
| "eval_loss": 2.700648307800293, | |
| "eval_runtime": 43.0535, | |
| "eval_samples_per_second": 150.58, | |
| "eval_steps_per_second": 2.532, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.6602208193321073e-06, | |
| "loss": 2.6809, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3884989176410914e-06, | |
| "loss": 2.6866, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.1167770159500756e-06, | |
| "loss": 2.6863, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.450551142590596e-07, | |
| "loss": 2.6912, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5.733332125680437e-07, | |
| "loss": 2.6916, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.0161131087702765e-07, | |
| "loss": 2.684, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 110607, | |
| "total_flos": 2.899312376933253e+20, | |
| "train_loss": 2.8584754099769967, | |
| "train_runtime": 318077.2613, | |
| "train_samples_per_second": 83.457, | |
| "train_steps_per_second": 0.348 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 110607, | |
| "num_train_epochs": 1, | |
| "save_steps": 11061, | |
| "total_flos": 2.899312376933253e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |