| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 72.99270072992701, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.875e-05, | |
| "loss": 10.1798, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_test_accuracy": 0.0, | |
| "eval_test_loss": 3.4965455532073975, | |
| "eval_test_runtime": 7.5045, | |
| "eval_test_samples_per_second": 162.702, | |
| "eval_test_steps_per_second": 2.665, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.75e-05, | |
| "loss": 4.0133, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "eval_test_accuracy": 0.085995085995086, | |
| "eval_test_loss": 2.1827144622802734, | |
| "eval_test_runtime": 11.227, | |
| "eval_test_samples_per_second": 108.755, | |
| "eval_test_steps_per_second": 1.781, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.6250000000000006e-05, | |
| "loss": 2.1831, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "eval_test_accuracy": 0.28992628992628994, | |
| "eval_test_loss": 1.1154128313064575, | |
| "eval_test_runtime": 4.9154, | |
| "eval_test_samples_per_second": 248.402, | |
| "eval_test_steps_per_second": 4.069, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.178, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "eval_test_accuracy": 0.4430794430794431, | |
| "eval_test_loss": 0.7581946849822998, | |
| "eval_test_runtime": 5.2157, | |
| "eval_test_samples_per_second": 234.1, | |
| "eval_test_steps_per_second": 3.835, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.8603, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "eval_test_accuracy": 0.506961506961507, | |
| "eval_test_loss": 0.6857301592826843, | |
| "eval_test_runtime": 4.1605, | |
| "eval_test_samples_per_second": 293.474, | |
| "eval_test_steps_per_second": 4.807, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.7179, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "eval_test_accuracy": 0.5356265356265356, | |
| "eval_test_loss": 0.629558801651001, | |
| "eval_test_runtime": 4.1543, | |
| "eval_test_samples_per_second": 293.909, | |
| "eval_test_steps_per_second": 4.814, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "learning_rate": 4.125e-05, | |
| "loss": 0.6347, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "eval_test_accuracy": 0.556920556920557, | |
| "eval_test_loss": 0.6828880310058594, | |
| "eval_test_runtime": 4.1527, | |
| "eval_test_samples_per_second": 294.026, | |
| "eval_test_steps_per_second": 4.816, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 14.6, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5714, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 14.6, | |
| "eval_test_accuracy": 0.5683865683865684, | |
| "eval_test_loss": 0.6402557492256165, | |
| "eval_test_runtime": 4.1126, | |
| "eval_test_samples_per_second": 296.89, | |
| "eval_test_steps_per_second": 4.863, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "learning_rate": 3.875e-05, | |
| "loss": 0.535, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "eval_test_accuracy": 0.5823095823095823, | |
| "eval_test_loss": 0.6427932381629944, | |
| "eval_test_runtime": 4.1425, | |
| "eval_test_samples_per_second": 294.751, | |
| "eval_test_steps_per_second": 4.828, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4864, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "eval_test_accuracy": 0.5749385749385749, | |
| "eval_test_loss": 0.6692995429039001, | |
| "eval_test_runtime": 4.1218, | |
| "eval_test_samples_per_second": 296.233, | |
| "eval_test_steps_per_second": 4.852, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.07, | |
| "learning_rate": 3.625e-05, | |
| "loss": 0.4523, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 20.07, | |
| "eval_test_accuracy": 0.588042588042588, | |
| "eval_test_loss": 0.6854296326637268, | |
| "eval_test_runtime": 4.1256, | |
| "eval_test_samples_per_second": 295.954, | |
| "eval_test_steps_per_second": 4.848, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.4267, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "eval_test_accuracy": 0.5847665847665847, | |
| "eval_test_loss": 0.6832742691040039, | |
| "eval_test_runtime": 4.114, | |
| "eval_test_samples_per_second": 296.79, | |
| "eval_test_steps_per_second": 4.861, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 23.72, | |
| "learning_rate": 3.375000000000001e-05, | |
| "loss": 0.4017, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 23.72, | |
| "eval_test_accuracy": 0.5864045864045864, | |
| "eval_test_loss": 0.7026733756065369, | |
| "eval_test_runtime": 4.162, | |
| "eval_test_samples_per_second": 293.366, | |
| "eval_test_steps_per_second": 4.805, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 25.55, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.3737, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 25.55, | |
| "eval_test_accuracy": 0.5823095823095823, | |
| "eval_test_loss": 0.7358095645904541, | |
| "eval_test_runtime": 4.1139, | |
| "eval_test_samples_per_second": 296.797, | |
| "eval_test_steps_per_second": 4.862, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.3567, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "eval_test_accuracy": 0.583947583947584, | |
| "eval_test_loss": 0.7573221921920776, | |
| "eval_test_runtime": 4.1462, | |
| "eval_test_samples_per_second": 294.489, | |
| "eval_test_steps_per_second": 4.824, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "learning_rate": 3e-05, | |
| "loss": 0.3329, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "eval_test_accuracy": 0.5831285831285832, | |
| "eval_test_loss": 0.7671645283699036, | |
| "eval_test_runtime": 4.1876, | |
| "eval_test_samples_per_second": 291.577, | |
| "eval_test_steps_per_second": 4.776, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 31.02, | |
| "learning_rate": 2.8749999999999997e-05, | |
| "loss": 0.3178, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 31.02, | |
| "eval_test_accuracy": 0.5937755937755937, | |
| "eval_test_loss": 0.8280954360961914, | |
| "eval_test_runtime": 4.1401, | |
| "eval_test_samples_per_second": 294.919, | |
| "eval_test_steps_per_second": 4.831, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 32.85, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3031, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 32.85, | |
| "eval_test_accuracy": 0.5954135954135954, | |
| "eval_test_loss": 0.8298905491828918, | |
| "eval_test_runtime": 4.1172, | |
| "eval_test_samples_per_second": 296.557, | |
| "eval_test_steps_per_second": 4.858, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 2.625e-05, | |
| "loss": 0.2942, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "eval_test_accuracy": 0.592956592956593, | |
| "eval_test_loss": 0.8406508564949036, | |
| "eval_test_runtime": 4.1428, | |
| "eval_test_samples_per_second": 294.726, | |
| "eval_test_steps_per_second": 4.828, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.2794, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "eval_test_accuracy": 0.6003276003276004, | |
| "eval_test_loss": 0.8442530035972595, | |
| "eval_test_runtime": 4.3235, | |
| "eval_test_samples_per_second": 282.409, | |
| "eval_test_steps_per_second": 4.626, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 38.32, | |
| "learning_rate": 2.375e-05, | |
| "loss": 0.2733, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 38.32, | |
| "eval_test_accuracy": 0.6052416052416052, | |
| "eval_test_loss": 0.8638033270835876, | |
| "eval_test_runtime": 4.1266, | |
| "eval_test_samples_per_second": 295.887, | |
| "eval_test_steps_per_second": 4.847, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 40.15, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.2631, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 40.15, | |
| "eval_test_accuracy": 0.5888615888615889, | |
| "eval_test_loss": 0.890779435634613, | |
| "eval_test_runtime": 4.1284, | |
| "eval_test_samples_per_second": 295.759, | |
| "eval_test_steps_per_second": 4.845, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 41.97, | |
| "learning_rate": 2.125e-05, | |
| "loss": 0.2574, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 41.97, | |
| "eval_test_accuracy": 0.588042588042588, | |
| "eval_test_loss": 0.9194920063018799, | |
| "eval_test_runtime": 4.2329, | |
| "eval_test_samples_per_second": 288.451, | |
| "eval_test_steps_per_second": 4.725, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 43.8, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2445, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 43.8, | |
| "eval_test_accuracy": 0.5913185913185913, | |
| "eval_test_loss": 0.9236257672309875, | |
| "eval_test_runtime": 4.1684, | |
| "eval_test_samples_per_second": 292.916, | |
| "eval_test_steps_per_second": 4.798, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.2417, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "eval_test_accuracy": 0.5913185913185913, | |
| "eval_test_loss": 0.9303093552589417, | |
| "eval_test_runtime": 4.1896, | |
| "eval_test_samples_per_second": 291.435, | |
| "eval_test_steps_per_second": 4.774, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 47.45, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.2316, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 47.45, | |
| "eval_test_accuracy": 0.6060606060606061, | |
| "eval_test_loss": 0.9456475377082825, | |
| "eval_test_runtime": 4.1609, | |
| "eval_test_samples_per_second": 293.446, | |
| "eval_test_steps_per_second": 4.807, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 49.27, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 0.227, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 49.27, | |
| "eval_test_accuracy": 0.5978705978705978, | |
| "eval_test_loss": 0.9745798110961914, | |
| "eval_test_runtime": 4.1394, | |
| "eval_test_samples_per_second": 294.971, | |
| "eval_test_steps_per_second": 4.832, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 51.09, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2241, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 51.09, | |
| "eval_test_accuracy": 0.6052416052416052, | |
| "eval_test_loss": 0.938654899597168, | |
| "eval_test_runtime": 4.1652, | |
| "eval_test_samples_per_second": 293.143, | |
| "eval_test_steps_per_second": 4.802, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 52.92, | |
| "learning_rate": 1.3750000000000002e-05, | |
| "loss": 0.2174, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 52.92, | |
| "eval_test_accuracy": 0.5986895986895987, | |
| "eval_test_loss": 0.9762380719184875, | |
| "eval_test_runtime": 4.2021, | |
| "eval_test_samples_per_second": 290.57, | |
| "eval_test_steps_per_second": 4.76, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 54.74, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.212, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 54.74, | |
| "eval_test_accuracy": 0.601965601965602, | |
| "eval_test_loss": 0.9834132194519043, | |
| "eval_test_runtime": 4.1906, | |
| "eval_test_samples_per_second": 291.369, | |
| "eval_test_steps_per_second": 4.773, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 56.57, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.206, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 56.57, | |
| "eval_test_accuracy": 0.5995085995085995, | |
| "eval_test_loss": 0.9860948920249939, | |
| "eval_test_runtime": 4.1715, | |
| "eval_test_samples_per_second": 292.702, | |
| "eval_test_steps_per_second": 4.794, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 58.39, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2057, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 58.39, | |
| "eval_test_accuracy": 0.5962325962325963, | |
| "eval_test_loss": 1.0094884634017944, | |
| "eval_test_runtime": 4.2216, | |
| "eval_test_samples_per_second": 289.23, | |
| "eval_test_steps_per_second": 4.738, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 60.22, | |
| "learning_rate": 8.75e-06, | |
| "loss": 0.2023, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 60.22, | |
| "eval_test_accuracy": 0.597051597051597, | |
| "eval_test_loss": 1.000124216079712, | |
| "eval_test_runtime": 4.1702, | |
| "eval_test_samples_per_second": 292.793, | |
| "eval_test_steps_per_second": 4.796, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 62.04, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.1994, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 62.04, | |
| "eval_test_accuracy": 0.5995085995085995, | |
| "eval_test_loss": 1.0179657936096191, | |
| "eval_test_runtime": 4.1982, | |
| "eval_test_samples_per_second": 290.842, | |
| "eval_test_steps_per_second": 4.764, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 63.87, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.1967, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 63.87, | |
| "eval_test_accuracy": 0.6044226044226044, | |
| "eval_test_loss": 1.0143113136291504, | |
| "eval_test_runtime": 4.1544, | |
| "eval_test_samples_per_second": 293.907, | |
| "eval_test_steps_per_second": 4.814, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 65.69, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1915, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 65.69, | |
| "eval_test_accuracy": 0.6011466011466011, | |
| "eval_test_loss": 1.0377224683761597, | |
| "eval_test_runtime": 4.1791, | |
| "eval_test_samples_per_second": 292.165, | |
| "eval_test_steps_per_second": 4.786, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 67.52, | |
| "learning_rate": 3.75e-06, | |
| "loss": 0.1934, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 67.52, | |
| "eval_test_accuracy": 0.601965601965602, | |
| "eval_test_loss": 1.02960205078125, | |
| "eval_test_runtime": 4.2049, | |
| "eval_test_samples_per_second": 290.372, | |
| "eval_test_steps_per_second": 4.756, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 69.34, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.1932, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 69.34, | |
| "eval_test_accuracy": 0.601965601965602, | |
| "eval_test_loss": 1.0294890403747559, | |
| "eval_test_runtime": 4.1796, | |
| "eval_test_samples_per_second": 292.135, | |
| "eval_test_steps_per_second": 4.785, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 71.17, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.1898, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 71.17, | |
| "eval_test_accuracy": 0.6011466011466011, | |
| "eval_test_loss": 1.0313055515289307, | |
| "eval_test_runtime": 4.1318, | |
| "eval_test_samples_per_second": 295.51, | |
| "eval_test_steps_per_second": 4.84, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 72.99, | |
| "learning_rate": 0.0, | |
| "loss": 0.1916, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 72.99, | |
| "eval_test_accuracy": 0.6011466011466011, | |
| "eval_test_loss": 1.0304898023605347, | |
| "eval_test_runtime": 4.1756, | |
| "eval_test_samples_per_second": 292.415, | |
| "eval_test_steps_per_second": 4.79, | |
| "step": 10000 | |
| } | |
| ], | |
| "max_steps": 10000, | |
| "num_train_epochs": 73, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |