| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.009290658243136527, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 622.0, | |
| "completions/max_terminated_length": 622.0, | |
| "completions/mean_length": 551.0, | |
| "completions/mean_terminated_length": 551.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 9.290658243136526e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4252.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 3.0405590534210205, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 4.306971073150635, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 559.0, | |
| "completions/max_terminated_length": 559.0, | |
| "completions/mean_length": 538.5, | |
| "completions/mean_terminated_length": 538.5, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.00018581316486273051, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.875, | |
| "learning_rate": 4.95e-07, | |
| "loss": -0.0058, | |
| "num_tokens": 8340.0, | |
| "reward": -3.500124931335449, | |
| "reward_std": 1.414036750793457, | |
| "rewards/chatgpt_combined_reward/mean": -3.500124931335449, | |
| "rewards/chatgpt_combined_reward/std": 7.6809611320495605, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 730.0, | |
| "completions/max_terminated_length": 730.0, | |
| "completions/mean_length": 598.75, | |
| "completions/mean_terminated_length": 598.75, | |
| "completions/min_length": 523.0, | |
| "completions/min_terminated_length": 523.0, | |
| "epoch": 0.00027871974729409577, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.9e-07, | |
| "loss": -0.0225, | |
| "num_tokens": 12655.0, | |
| "reward": -1.5003752708435059, | |
| "reward_std": 1.414036750793457, | |
| "rewards/chatgpt_combined_reward/mean": -1.5003752708435059, | |
| "rewards/chatgpt_combined_reward/std": 9.949413299560547, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 648.0, | |
| "completions/mean_length": 803.5, | |
| "completions/mean_terminated_length": 583.0, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.00037162632972546103, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 4.85e-07, | |
| "loss": -0.0365, | |
| "num_tokens": 17917.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 674.0, | |
| "completions/max_terminated_length": 674.0, | |
| "completions/mean_length": 566.25, | |
| "completions/mean_terminated_length": 566.25, | |
| "completions/min_length": 516.0, | |
| "completions/min_terminated_length": 516.0, | |
| "epoch": 0.0004645329121568263, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 4.8e-07, | |
| "loss": -0.0466, | |
| "num_tokens": 21406.0, | |
| "reward": -1.7503750324249268, | |
| "reward_std": 2.4746968746185303, | |
| "rewards/chatgpt_combined_reward/mean": -1.7503750324249268, | |
| "rewards/chatgpt_combined_reward/std": 9.945212364196777, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 778.0, | |
| "completions/max_terminated_length": 778.0, | |
| "completions/mean_length": 599.0, | |
| "completions/mean_terminated_length": 599.0, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.0005574394945881915, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 4.7499999999999995e-07, | |
| "loss": -0.0, | |
| "num_tokens": 25850.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 600.0, | |
| "completions/max_terminated_length": 600.0, | |
| "completions/mean_length": 550.5, | |
| "completions/mean_terminated_length": 550.5, | |
| "completions/min_length": 527.0, | |
| "completions/min_terminated_length": 527.0, | |
| "epoch": 0.0006503460770195569, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 4.6999999999999995e-07, | |
| "loss": -0.009, | |
| "num_tokens": 29262.0, | |
| "reward": -2.9171252250671387, | |
| "reward_std": 4.124730587005615, | |
| "rewards/chatgpt_combined_reward/mean": -2.9171252250671387, | |
| "rewards/chatgpt_combined_reward/std": 9.46435546875, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 607.0, | |
| "completions/max_terminated_length": 607.0, | |
| "completions/mean_length": 544.25, | |
| "completions/mean_terminated_length": 544.25, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.0007432526594509221, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.65e-07, | |
| "loss": -0.0106, | |
| "num_tokens": 32691.0, | |
| "reward": -2.583625078201294, | |
| "reward_std": 1.296303391456604, | |
| "rewards/chatgpt_combined_reward/mean": -2.583625078201294, | |
| "rewards/chatgpt_combined_reward/std": 8.693524360656738, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 543.0, | |
| "completions/mean_length": 776.5, | |
| "completions/mean_terminated_length": 529.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.0008361592418822874, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.25, | |
| "learning_rate": 4.6e-07, | |
| "loss": 0.2254, | |
| "num_tokens": 37845.0, | |
| "reward": -6.249625205993652, | |
| "reward_std": 2.9463372230529785, | |
| "rewards/chatgpt_combined_reward/mean": -6.249625205993652, | |
| "rewards/chatgpt_combined_reward/std": 2.846529960632324, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 537.0, | |
| "completions/max_terminated_length": 537.0, | |
| "completions/mean_length": 528.75, | |
| "completions/mean_terminated_length": 528.75, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.0009290658243136526, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 4.55e-07, | |
| "loss": 0.0017, | |
| "num_tokens": 41190.0, | |
| "reward": 0.33299994468688965, | |
| "reward_std": 8.249107360839844, | |
| "rewards/chatgpt_combined_reward/mean": 0.33299994468688965, | |
| "rewards/chatgpt_combined_reward/std": 7.683218955993652, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 554.0, | |
| "completions/mean_length": 655.75, | |
| "completions/mean_terminated_length": 533.0, | |
| "completions/min_length": 516.0, | |
| "completions/min_terminated_length": 516.0, | |
| "epoch": 0.001021972406745018, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.875, | |
| "learning_rate": 4.5e-07, | |
| "loss": 0.1097, | |
| "num_tokens": 45861.0, | |
| "reward": -8.89987564086914, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -8.89987564086914, | |
| "rewards/chatgpt_combined_reward/std": 2.62703275680542, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 561.0, | |
| "completions/max_terminated_length": 561.0, | |
| "completions/mean_length": 529.5, | |
| "completions/mean_terminated_length": 529.5, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.001114878989176383, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.45e-07, | |
| "loss": -0.0048, | |
| "num_tokens": 49193.0, | |
| "reward": -2.2503747940063477, | |
| "reward_std": 2.003410577774048, | |
| "rewards/chatgpt_combined_reward/mean": -2.2503747940063477, | |
| "rewards/chatgpt_combined_reward/std": 9.242679595947266, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 591.0, | |
| "completions/max_terminated_length": 591.0, | |
| "completions/mean_length": 558.5, | |
| "completions/mean_terminated_length": 558.5, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.0012077855716077484, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "loss": 0.0078, | |
| "num_tokens": 53475.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1008.0, | |
| "completions/max_terminated_length": 1008.0, | |
| "completions/mean_length": 680.75, | |
| "completions/mean_terminated_length": 680.75, | |
| "completions/min_length": 523.0, | |
| "completions/min_terminated_length": 523.0, | |
| "epoch": 0.0013006921540391137, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 4.3499999999999996e-07, | |
| "loss": 0.0722, | |
| "num_tokens": 58246.0, | |
| "reward": -8.89987564086914, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -8.89987564086914, | |
| "rewards/chatgpt_combined_reward/std": 2.62703275680542, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 567.0, | |
| "completions/max_terminated_length": 567.0, | |
| "completions/mean_length": 533.5, | |
| "completions/mean_terminated_length": 533.5, | |
| "completions/min_length": 514.0, | |
| "completions/min_terminated_length": 514.0, | |
| "epoch": 0.001393598736470479, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 4.2999999999999996e-07, | |
| "loss": 0.0044, | |
| "num_tokens": 62196.0, | |
| "reward": -2.083625078201294, | |
| "reward_std": 0.5891967415809631, | |
| "rewards/chatgpt_combined_reward/mean": -2.083625078201294, | |
| "rewards/chatgpt_combined_reward/std": 9.166325569152832, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 729.0, | |
| "completions/mean_length": 842.0, | |
| "completions/mean_terminated_length": 660.0, | |
| "completions/min_length": 591.0, | |
| "completions/min_terminated_length": 591.0, | |
| "epoch": 0.0014865053189018441, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.75, | |
| "learning_rate": 4.2499999999999995e-07, | |
| "loss": -0.1542, | |
| "num_tokens": 67612.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 682.0, | |
| "completions/max_terminated_length": 682.0, | |
| "completions/mean_length": 560.0, | |
| "completions/mean_terminated_length": 560.0, | |
| "completions/min_length": 514.0, | |
| "completions/min_terminated_length": 514.0, | |
| "epoch": 0.0015794119013332094, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.375, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "loss": 0.0252, | |
| "num_tokens": 71066.0, | |
| "reward": -1.6670000553131104, | |
| "reward_std": 2.3567867279052734, | |
| "rewards/chatgpt_combined_reward/mean": -1.6670000553131104, | |
| "rewards/chatgpt_combined_reward/std": 9.999555587768555, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 699.0, | |
| "completions/max_terminated_length": 699.0, | |
| "completions/mean_length": 607.5, | |
| "completions/mean_terminated_length": 607.5, | |
| "completions/min_length": 521.0, | |
| "completions/min_terminated_length": 521.0, | |
| "epoch": 0.0016723184837645747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.125, | |
| "learning_rate": 4.1499999999999994e-07, | |
| "loss": -0.0983, | |
| "num_tokens": 75544.0, | |
| "reward": -8.89987564086914, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -8.89987564086914, | |
| "rewards/chatgpt_combined_reward/std": 2.2138428688049316, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 754.0, | |
| "completions/max_terminated_length": 754.0, | |
| "completions/mean_length": 596.75, | |
| "completions/mean_terminated_length": 596.75, | |
| "completions/min_length": 529.0, | |
| "completions/min_terminated_length": 529.0, | |
| "epoch": 0.00176522506619594, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 4.0999999999999994e-07, | |
| "loss": 0.0, | |
| "num_tokens": 79979.0, | |
| "reward": -8.48324966430664, | |
| "reward_std": 2.357140302658081, | |
| "rewards/chatgpt_combined_reward/mean": -8.48324966430664, | |
| "rewards/chatgpt_combined_reward/std": 3.0433735847473145, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 593.0, | |
| "completions/max_terminated_length": 593.0, | |
| "completions/mean_length": 545.25, | |
| "completions/mean_terminated_length": 545.25, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.0018581316486273051, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 4.05e-07, | |
| "loss": -0.0202, | |
| "num_tokens": 83912.0, | |
| "reward": -2.333625078201294, | |
| "reward_std": 1.414036750793457, | |
| "rewards/chatgpt_combined_reward/mean": -2.333625078201294, | |
| "rewards/chatgpt_combined_reward/std": 9.001688957214355, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 832.0, | |
| "completions/max_terminated_length": 832.0, | |
| "completions/mean_length": 612.0, | |
| "completions/mean_terminated_length": 612.0, | |
| "completions/min_length": 524.0, | |
| "completions/min_terminated_length": 524.0, | |
| "epoch": 0.0019510382310586705, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.5, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0457, | |
| "num_tokens": 88190.0, | |
| "reward": -2.1670000553131104, | |
| "reward_std": 0.47128671407699585, | |
| "rewards/chatgpt_combined_reward/mean": -2.1670000553131104, | |
| "rewards/chatgpt_combined_reward/std": 9.061125755310059, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 632.0, | |
| "completions/mean_length": 709.5, | |
| "completions/mean_terminated_length": 604.6666870117188, | |
| "completions/min_length": 582.0, | |
| "completions/min_terminated_length": 582.0, | |
| "epoch": 0.002043944813490036, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.75, | |
| "learning_rate": 3.95e-07, | |
| "loss": 0.089, | |
| "num_tokens": 93076.0, | |
| "reward": -6.816500663757324, | |
| "reward_std": 0.21213209629058838, | |
| "rewards/chatgpt_combined_reward/mean": -6.816500663757324, | |
| "rewards/chatgpt_combined_reward/std": 4.199178218841553, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 547.0, | |
| "completions/max_terminated_length": 547.0, | |
| "completions/mean_length": 533.0, | |
| "completions/mean_terminated_length": 533.0, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.002136851395921401, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 3.8999999999999997e-07, | |
| "loss": 0.0049, | |
| "num_tokens": 96414.0, | |
| "reward": -2.5003750324249268, | |
| "reward_std": 1.6498569250106812, | |
| "rewards/chatgpt_combined_reward/mean": -2.5003750324249268, | |
| "rewards/chatgpt_combined_reward/std": 8.866897583007812, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 992.0, | |
| "completions/max_terminated_length": 992.0, | |
| "completions/mean_length": 722.25, | |
| "completions/mean_terminated_length": 722.25, | |
| "completions/min_length": 521.0, | |
| "completions/min_terminated_length": 521.0, | |
| "epoch": 0.002229757978352766, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 3.8499999999999997e-07, | |
| "loss": 0.0319, | |
| "num_tokens": 100539.0, | |
| "reward": -2.083624839782715, | |
| "reward_std": 1.7675902843475342, | |
| "rewards/chatgpt_combined_reward/mean": -2.083624839782715, | |
| "rewards/chatgpt_combined_reward/std": 9.366135597229004, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 716.0, | |
| "completions/max_terminated_length": 716.0, | |
| "completions/mean_length": 616.5, | |
| "completions/mean_terminated_length": 616.5, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.0023226645607841317, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "loss": 0.0023, | |
| "num_tokens": 104789.0, | |
| "reward": -2.333750009536743, | |
| "reward_std": 0.942926824092865, | |
| "rewards/chatgpt_combined_reward/mean": -2.333750009536743, | |
| "rewards/chatgpt_combined_reward/std": 8.91893196105957, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 801.0, | |
| "completions/max_terminated_length": 801.0, | |
| "completions/mean_length": 624.0, | |
| "completions/mean_terminated_length": 624.0, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.002415571143215497, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 3.75e-07, | |
| "loss": -0.0, | |
| "num_tokens": 109333.0, | |
| "reward": -7.916375160217285, | |
| "reward_std": 2.9466910362243652, | |
| "rewards/chatgpt_combined_reward/mean": -7.916375160217285, | |
| "rewards/chatgpt_combined_reward/std": 2.500305652618408, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 602.0, | |
| "completions/max_terminated_length": 602.0, | |
| "completions/mean_length": 579.5, | |
| "completions/mean_terminated_length": 579.5, | |
| "completions/min_length": 539.0, | |
| "completions/min_terminated_length": 539.0, | |
| "epoch": 0.002508477725646862, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.21875, | |
| "learning_rate": 3.7e-07, | |
| "loss": 0.015, | |
| "num_tokens": 112875.0, | |
| "reward": 2.384185791015625e-07, | |
| "reward_std": 2.3567869663238525, | |
| "rewards/chatgpt_combined_reward/mean": 2.384185791015625e-07, | |
| "rewards/chatgpt_combined_reward/std": 7.070361137390137, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 695.0, | |
| "completions/mean_length": 694.0, | |
| "completions/mean_terminated_length": 584.0, | |
| "completions/min_length": 516.0, | |
| "completions/min_terminated_length": 516.0, | |
| "epoch": 0.0026013843080782274, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 3.65e-07, | |
| "loss": -0.076, | |
| "num_tokens": 117699.0, | |
| "reward": -5.416375160217285, | |
| "reward_std": 2.9463372230529785, | |
| "rewards/chatgpt_combined_reward/mean": -5.416375160217285, | |
| "rewards/chatgpt_combined_reward/std": 3.4360225200653076, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 571.0, | |
| "completions/mean_length": 910.75, | |
| "completions/mean_terminated_length": 571.0, | |
| "completions/min_length": 571.0, | |
| "completions/min_terminated_length": 571.0, | |
| "epoch": 0.0026942908905095925, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 3.6e-07, | |
| "loss": 0.1004, | |
| "num_tokens": 123390.0, | |
| "reward": -8.48324966430664, | |
| "reward_std": 2.357140302658081, | |
| "rewards/chatgpt_combined_reward/mean": -8.48324966430664, | |
| "rewards/chatgpt_combined_reward/std": 3.0433735847473145, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 636.0, | |
| "completions/mean_length": 700.75, | |
| "completions/mean_terminated_length": 593.0, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.002787197472940958, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.5, | |
| "learning_rate": 3.55e-07, | |
| "loss": 0.0609, | |
| "num_tokens": 128241.0, | |
| "reward": -7.499750137329102, | |
| "reward_std": 1.1783934831619263, | |
| "rewards/chatgpt_combined_reward/mean": -7.499750137329102, | |
| "rewards/chatgpt_combined_reward/std": 3.191626787185669, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 733.0, | |
| "completions/max_terminated_length": 733.0, | |
| "completions/mean_length": 613.0, | |
| "completions/mean_terminated_length": 613.0, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.002880104055372323, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 3.5e-07, | |
| "loss": 0.0078, | |
| "num_tokens": 132735.0, | |
| "reward": -2.5002501010894775, | |
| "reward_std": 1.1783934831619263, | |
| "rewards/chatgpt_combined_reward/mean": -2.5002501010894775, | |
| "rewards/chatgpt_combined_reward/std": 8.766212463378906, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 995.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 712.0, | |
| "completions/mean_terminated_length": 712.0, | |
| "completions/min_length": 534.0, | |
| "completions/min_terminated_length": 534.0, | |
| "epoch": 0.0029730106378036882, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 3.45e-07, | |
| "loss": 0.0859, | |
| "num_tokens": 137571.0, | |
| "reward": -1.7503750324249268, | |
| "reward_std": 2.4746968746185303, | |
| "rewards/chatgpt_combined_reward/mean": -1.7503750324249268, | |
| "rewards/chatgpt_combined_reward/std": 9.945212364196777, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 570.0, | |
| "completions/max_terminated_length": 570.0, | |
| "completions/mean_length": 531.0, | |
| "completions/mean_terminated_length": 531.0, | |
| "completions/min_length": 513.0, | |
| "completions/min_terminated_length": 513.0, | |
| "epoch": 0.0030659172202350538, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "loss": 0.0149, | |
| "num_tokens": 141743.0, | |
| "reward": -9.316499710083008, | |
| "reward_std": 1.1787471771240234, | |
| "rewards/chatgpt_combined_reward/mean": -9.316499710083008, | |
| "rewards/chatgpt_combined_reward/std": 1.3887726068496704, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 721.0, | |
| "completions/mean_length": 750.75, | |
| "completions/mean_terminated_length": 659.6666870117188, | |
| "completions/min_length": 605.0, | |
| "completions/min_terminated_length": 605.0, | |
| "epoch": 0.003158823802666419, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 3.35e-07, | |
| "loss": -0.0175, | |
| "num_tokens": 146734.0, | |
| "reward": -2.083624839782715, | |
| "reward_std": 1.7675902843475342, | |
| "rewards/chatgpt_combined_reward/mean": -2.083624839782715, | |
| "rewards/chatgpt_combined_reward/std": 9.366135597229004, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 643.0, | |
| "completions/max_terminated_length": 643.0, | |
| "completions/mean_length": 552.5, | |
| "completions/mean_terminated_length": 552.5, | |
| "completions/min_length": 514.0, | |
| "completions/min_terminated_length": 514.0, | |
| "epoch": 0.0032517303850977844, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 3.3e-07, | |
| "loss": -0.0381, | |
| "num_tokens": 150992.0, | |
| "reward": -6.666500091552734, | |
| "reward_std": 1.1783933639526367, | |
| "rewards/chatgpt_combined_reward/mean": -6.666500091552734, | |
| "rewards/chatgpt_combined_reward/std": 4.0826191902160645, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 738.0, | |
| "completions/max_terminated_length": 738.0, | |
| "completions/mean_length": 577.25, | |
| "completions/mean_terminated_length": 577.25, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.0033446369675291495, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 3.25e-07, | |
| "loss": -0.059, | |
| "num_tokens": 155349.0, | |
| "reward": -6.249875068664551, | |
| "reward_std": 5.3034772872924805, | |
| "rewards/chatgpt_combined_reward/mean": -6.249875068664551, | |
| "rewards/chatgpt_combined_reward/std": 4.383391380310059, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 801.0, | |
| "completions/max_terminated_length": 801.0, | |
| "completions/mean_length": 589.5, | |
| "completions/mean_terminated_length": 589.5, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.0034375435499605146, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.125, | |
| "learning_rate": 3.2e-07, | |
| "loss": -0.0, | |
| "num_tokens": 159755.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 713.0, | |
| "completions/max_terminated_length": 713.0, | |
| "completions/mean_length": 602.5, | |
| "completions/mean_terminated_length": 602.5, | |
| "completions/min_length": 548.0, | |
| "completions/min_terminated_length": 548.0, | |
| "epoch": 0.00353045013239188, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.25, | |
| "learning_rate": 3.15e-07, | |
| "loss": -0.0297, | |
| "num_tokens": 164213.0, | |
| "reward": -8.066625595092773, | |
| "reward_std": 2.9463369846343994, | |
| "rewards/chatgpt_combined_reward/mean": -8.066625595092773, | |
| "rewards/chatgpt_combined_reward/std": 4.283124923706055, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 810.0, | |
| "completions/max_terminated_length": 810.0, | |
| "completions/mean_length": 696.5, | |
| "completions/mean_terminated_length": 696.5, | |
| "completions/min_length": 536.0, | |
| "completions/min_terminated_length": 536.0, | |
| "epoch": 0.003623356714823245, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 6.875, | |
| "learning_rate": 3.1e-07, | |
| "loss": -0.0857, | |
| "num_tokens": 169047.0, | |
| "reward": -8.483250617980957, | |
| "reward_std": 2.357140302658081, | |
| "rewards/chatgpt_combined_reward/mean": -8.483250617980957, | |
| "rewards/chatgpt_combined_reward/std": 3.453827142715454, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 575.0, | |
| "completions/mean_length": 666.0, | |
| "completions/mean_terminated_length": 546.6666870117188, | |
| "completions/min_length": 530.0, | |
| "completions/min_terminated_length": 530.0, | |
| "epoch": 0.0037162632972546103, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.25, | |
| "learning_rate": 3.05e-07, | |
| "loss": -0.0, | |
| "num_tokens": 172953.0, | |
| "reward": -0.25024986267089844, | |
| "reward_std": 3.889087200164795, | |
| "rewards/chatgpt_combined_reward/mean": -0.25024986267089844, | |
| "rewards/chatgpt_combined_reward/std": 8.088777542114258, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 575.0, | |
| "completions/max_terminated_length": 575.0, | |
| "completions/mean_length": 537.25, | |
| "completions/mean_terminated_length": 537.25, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.003809169879685976, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3e-07, | |
| "loss": -0.0072, | |
| "num_tokens": 176344.0, | |
| "reward": -3.666874885559082, | |
| "reward_std": 0.9427501559257507, | |
| "rewards/chatgpt_combined_reward/mean": -3.666874885559082, | |
| "rewards/chatgpt_combined_reward/std": 7.393443584442139, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 670.0, | |
| "completions/max_terminated_length": 670.0, | |
| "completions/mean_length": 577.0, | |
| "completions/mean_terminated_length": 577.0, | |
| "completions/min_length": 538.0, | |
| "completions/min_terminated_length": 538.0, | |
| "epoch": 0.003902076462117341, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.0, | |
| "learning_rate": 2.95e-07, | |
| "loss": -0.0, | |
| "num_tokens": 180700.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 741.0, | |
| "completions/max_terminated_length": 741.0, | |
| "completions/mean_length": 606.0, | |
| "completions/mean_terminated_length": 606.0, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.003994983044548706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.625, | |
| "learning_rate": 2.9e-07, | |
| "loss": 0.033, | |
| "num_tokens": 185098.0, | |
| "reward": -1.9167499542236328, | |
| "reward_std": 5.06748104095459, | |
| "rewards/chatgpt_combined_reward/mean": -1.9167499542236328, | |
| "rewards/chatgpt_combined_reward/std": 5.698338031768799, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 575.0, | |
| "completions/max_terminated_length": 575.0, | |
| "completions/mean_length": 560.0, | |
| "completions/mean_terminated_length": 560.0, | |
| "completions/min_length": 544.0, | |
| "completions/min_terminated_length": 544.0, | |
| "epoch": 0.004087889626980072, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 2.8499999999999997e-07, | |
| "loss": 0.0152, | |
| "num_tokens": 188550.0, | |
| "reward": -1.6668751239776611, | |
| "reward_std": 1.8856770992279053, | |
| "rewards/chatgpt_combined_reward/mean": -1.6668751239776611, | |
| "rewards/chatgpt_combined_reward/std": 7.039121150970459, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 573.0, | |
| "completions/mean_length": 785.0, | |
| "completions/mean_terminated_length": 546.0, | |
| "completions/min_length": 519.0, | |
| "completions/min_terminated_length": 519.0, | |
| "epoch": 0.004180796209411437, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.75, | |
| "learning_rate": 2.8e-07, | |
| "loss": -0.0, | |
| "num_tokens": 193738.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 534.0, | |
| "completions/mean_length": 651.5, | |
| "completions/mean_terminated_length": 527.3333740234375, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.004273702791842802, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 2.75e-07, | |
| "loss": 0.1173, | |
| "num_tokens": 198392.0, | |
| "reward": -8.066625595092773, | |
| "reward_std": 3.1584692001342773, | |
| "rewards/chatgpt_combined_reward/mean": -8.066625595092773, | |
| "rewards/chatgpt_combined_reward/std": 4.083926677703857, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 761.0, | |
| "completions/max_terminated_length": 761.0, | |
| "completions/mean_length": 585.5, | |
| "completions/mean_terminated_length": 585.5, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.004366609374274168, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 2.7e-07, | |
| "loss": 0.0291, | |
| "num_tokens": 201948.0, | |
| "reward": -1.333749771118164, | |
| "reward_std": 0.7071069478988647, | |
| "rewards/chatgpt_combined_reward/mean": -1.333749771118164, | |
| "rewards/chatgpt_combined_reward/std": 10.040179252624512, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 619.0, | |
| "completions/max_terminated_length": 619.0, | |
| "completions/mean_length": 579.0, | |
| "completions/mean_terminated_length": 579.0, | |
| "completions/min_length": 526.0, | |
| "completions/min_terminated_length": 526.0, | |
| "epoch": 0.004459515956705532, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.25, | |
| "learning_rate": 2.65e-07, | |
| "loss": -0.0357, | |
| "num_tokens": 205824.0, | |
| "reward": 2.082624673843384, | |
| "reward_std": 2.2388768196105957, | |
| "rewards/chatgpt_combined_reward/mean": 2.082624673843384, | |
| "rewards/chatgpt_combined_reward/std": 5.983651638031006, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 664.0, | |
| "completions/max_terminated_length": 664.0, | |
| "completions/mean_length": 567.25, | |
| "completions/mean_terminated_length": 567.25, | |
| "completions/min_length": 524.0, | |
| "completions/min_terminated_length": 524.0, | |
| "epoch": 0.004552422539136898, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.125, | |
| "learning_rate": 2.6e-07, | |
| "loss": 0.0394, | |
| "num_tokens": 210141.0, | |
| "reward": -9.316500663757324, | |
| "reward_std": 1.3908790349960327, | |
| "rewards/chatgpt_combined_reward/mean": -9.316500663757324, | |
| "rewards/chatgpt_combined_reward/std": 1.982193112373352, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 739.0, | |
| "completions/max_terminated_length": 739.0, | |
| "completions/mean_length": 640.0, | |
| "completions/mean_terminated_length": 640.0, | |
| "completions/min_length": 538.0, | |
| "completions/min_terminated_length": 538.0, | |
| "epoch": 0.004645329121568263, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.75, | |
| "learning_rate": 2.55e-07, | |
| "loss": 0.0381, | |
| "num_tokens": 213923.0, | |
| "reward": -4.583374977111816, | |
| "reward_std": 2.945983409881592, | |
| "rewards/chatgpt_combined_reward/mean": -4.583374977111816, | |
| "rewards/chatgpt_combined_reward/std": 7.119798183441162, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 559.0, | |
| "completions/mean_length": 661.0, | |
| "completions/mean_terminated_length": 540.0, | |
| "completions/min_length": 530.0, | |
| "completions/min_terminated_length": 530.0, | |
| "epoch": 0.004738235703999628, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.125, | |
| "learning_rate": 2.5e-07, | |
| "loss": -0.0, | |
| "num_tokens": 218615.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 829.0, | |
| "completions/max_terminated_length": 829.0, | |
| "completions/mean_length": 595.25, | |
| "completions/mean_terminated_length": 595.25, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.004831142286430994, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 2.45e-07, | |
| "loss": 0.0813, | |
| "num_tokens": 222836.0, | |
| "reward": -3.750124931335449, | |
| "reward_std": 4.124377250671387, | |
| "rewards/chatgpt_combined_reward/mean": -3.750124931335449, | |
| "rewards/chatgpt_combined_reward/std": 8.646496772766113, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1000.0, | |
| "completions/max_terminated_length": 1000.0, | |
| "completions/mean_length": 675.5, | |
| "completions/mean_terminated_length": 675.5, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.004924048868862359, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.53125, | |
| "learning_rate": 2.4e-07, | |
| "loss": -0.0, | |
| "num_tokens": 227586.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 697.0, | |
| "completions/max_terminated_length": 697.0, | |
| "completions/mean_length": 603.25, | |
| "completions/mean_terminated_length": 603.25, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.005016955451293724, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.25, | |
| "learning_rate": 2.3499999999999997e-07, | |
| "loss": 0.0037, | |
| "num_tokens": 231885.0, | |
| "reward": -0.25025010108947754, | |
| "reward_std": 5.067480087280273, | |
| "rewards/chatgpt_combined_reward/mean": -0.25025010108947754, | |
| "rewards/chatgpt_combined_reward/std": 8.490804672241211, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 601.0, | |
| "completions/mean_length": 667.75, | |
| "completions/mean_terminated_length": 549.0, | |
| "completions/min_length": 521.0, | |
| "completions/min_terminated_length": 521.0, | |
| "epoch": 0.005109862033725089, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 2.3e-07, | |
| "loss": -0.0456, | |
| "num_tokens": 236320.0, | |
| "reward": -2.833625078201294, | |
| "reward_std": 0.7069300413131714, | |
| "rewards/chatgpt_combined_reward/mean": -2.833625078201294, | |
| "rewards/chatgpt_combined_reward/std": 8.315181732177734, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 650.0, | |
| "completions/max_terminated_length": 650.0, | |
| "completions/mean_length": 571.25, | |
| "completions/mean_terminated_length": 571.25, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.005202768616156455, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 2.25e-07, | |
| "loss": 0.0162, | |
| "num_tokens": 239589.0, | |
| "reward": 7.665875434875488, | |
| "reward_std": 0.9427504539489746, | |
| "rewards/chatgpt_combined_reward/mean": 7.665875434875488, | |
| "rewards/chatgpt_combined_reward/std": 0.8164288401603699, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 585.0, | |
| "completions/max_terminated_length": 585.0, | |
| "completions/mean_length": 555.75, | |
| "completions/mean_terminated_length": 555.75, | |
| "completions/min_length": 524.0, | |
| "completions/min_terminated_length": 524.0, | |
| "epoch": 0.00529567519858782, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "loss": 0.0021, | |
| "num_tokens": 243860.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 724.0, | |
| "completions/max_terminated_length": 724.0, | |
| "completions/mean_length": 616.0, | |
| "completions/mean_terminated_length": 616.0, | |
| "completions/min_length": 562.0, | |
| "completions/min_terminated_length": 562.0, | |
| "epoch": 0.005388581781019185, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.53125, | |
| "learning_rate": 2.1499999999999998e-07, | |
| "loss": 0.0464, | |
| "num_tokens": 248372.0, | |
| "reward": -7.499750137329102, | |
| "reward_std": 3.5358872413635254, | |
| "rewards/chatgpt_combined_reward/mean": -7.499750137329102, | |
| "rewards/chatgpt_combined_reward/std": 3.191626787185669, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 835.0, | |
| "completions/mean_length": 726.5, | |
| "completions/mean_terminated_length": 627.3333740234375, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.0054814883634505505, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.875, | |
| "learning_rate": 2.0999999999999997e-07, | |
| "loss": 0.0292, | |
| "num_tokens": 253326.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 665.0, | |
| "completions/max_terminated_length": 665.0, | |
| "completions/mean_length": 558.0, | |
| "completions/mean_terminated_length": 558.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.005574394945881916, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 10.375, | |
| "learning_rate": 2.0499999999999997e-07, | |
| "loss": 0.0376, | |
| "num_tokens": 257606.0, | |
| "reward": -7.083125114440918, | |
| "reward_std": 4.125083923339844, | |
| "rewards/chatgpt_combined_reward/mean": -7.083125114440918, | |
| "rewards/chatgpt_combined_reward/std": 3.436143636703491, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 692.0, | |
| "completions/max_terminated_length": 692.0, | |
| "completions/mean_length": 613.25, | |
| "completions/mean_terminated_length": 613.25, | |
| "completions/min_length": 514.0, | |
| "completions/min_terminated_length": 514.0, | |
| "epoch": 0.005667301528313281, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.75, | |
| "learning_rate": 2e-07, | |
| "loss": -0.0, | |
| "num_tokens": 262107.0, | |
| "reward": -5.833000183105469, | |
| "reward_std": 3.535533905029297, | |
| "rewards/chatgpt_combined_reward/mean": -5.833000183105469, | |
| "rewards/chatgpt_combined_reward/std": 3.469496726989746, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 572.0, | |
| "completions/mean_length": 672.0, | |
| "completions/mean_terminated_length": 554.6666870117188, | |
| "completions/min_length": 537.0, | |
| "completions/min_terminated_length": 537.0, | |
| "epoch": 0.005760208110744646, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.9499999999999999e-07, | |
| "loss": 0.1156, | |
| "num_tokens": 266843.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 884.0, | |
| "completions/max_terminated_length": 884.0, | |
| "completions/mean_length": 666.5, | |
| "completions/mean_terminated_length": 666.5, | |
| "completions/min_length": 543.0, | |
| "completions/min_terminated_length": 543.0, | |
| "epoch": 0.005853114693176012, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 1.8999999999999998e-07, | |
| "loss": 0.0609, | |
| "num_tokens": 271557.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 683.0, | |
| "completions/mean_terminated_length": 569.3333740234375, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.0059460212756073764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 1.85e-07, | |
| "loss": -0.0, | |
| "num_tokens": 276127.0, | |
| "reward": -0.0002503395080566406, | |
| "reward_std": 3.535533905029297, | |
| "rewards/chatgpt_combined_reward/mean": -0.0002503395080566406, | |
| "rewards/chatgpt_combined_reward/std": 9.128161430358887, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 990.0, | |
| "completions/mean_length": 838.0, | |
| "completions/mean_terminated_length": 776.0, | |
| "completions/min_length": 549.0, | |
| "completions/min_terminated_length": 549.0, | |
| "epoch": 0.006038927858038742, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 1.8e-07, | |
| "loss": 0.0668, | |
| "num_tokens": 281527.0, | |
| "reward": -9.316499710083008, | |
| "reward_std": 1.3908792734146118, | |
| "rewards/chatgpt_combined_reward/mean": -9.316499710083008, | |
| "rewards/chatgpt_combined_reward/std": 1.61105215549469, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 803.0, | |
| "completions/mean_length": 881.75, | |
| "completions/mean_terminated_length": 739.5, | |
| "completions/min_length": 676.0, | |
| "completions/min_terminated_length": 676.0, | |
| "epoch": 0.0061318344404701075, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.875, | |
| "learning_rate": 1.75e-07, | |
| "loss": 0.1151, | |
| "num_tokens": 287102.0, | |
| "reward": -8.89987564086914, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -8.89987564086914, | |
| "rewards/chatgpt_combined_reward/std": 2.2138428688049316, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 665.0, | |
| "completions/max_terminated_length": 665.0, | |
| "completions/mean_length": 606.5, | |
| "completions/mean_terminated_length": 606.5, | |
| "completions/min_length": 550.0, | |
| "completions/min_terminated_length": 550.0, | |
| "epoch": 0.006224741022901472, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.7000000000000001e-07, | |
| "loss": 0.0032, | |
| "num_tokens": 291296.0, | |
| "reward": -2.333750009536743, | |
| "reward_std": 1.4142135381698608, | |
| "rewards/chatgpt_combined_reward/mean": -2.333750009536743, | |
| "rewards/chatgpt_combined_reward/std": 9.001585006713867, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 658.0, | |
| "completions/max_terminated_length": 658.0, | |
| "completions/mean_length": 580.5, | |
| "completions/mean_terminated_length": 580.5, | |
| "completions/min_length": 544.0, | |
| "completions/min_terminated_length": 544.0, | |
| "epoch": 0.006317647605332838, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.625, | |
| "learning_rate": 1.65e-07, | |
| "loss": 0.026, | |
| "num_tokens": 295666.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 621.0, | |
| "completions/mean_length": 673.0, | |
| "completions/mean_terminated_length": 556.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.006410554187764203, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.09375, | |
| "learning_rate": 1.6e-07, | |
| "loss": 0.1442, | |
| "num_tokens": 300406.0, | |
| "reward": -7.499750137329102, | |
| "reward_std": 3.5358872413635254, | |
| "rewards/chatgpt_combined_reward/mean": -7.499750137329102, | |
| "rewards/chatgpt_combined_reward/std": 3.191626787185669, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 692.0, | |
| "completions/max_terminated_length": 692.0, | |
| "completions/mean_length": 593.25, | |
| "completions/mean_terminated_length": 593.25, | |
| "completions/min_length": 520.0, | |
| "completions/min_terminated_length": 520.0, | |
| "epoch": 0.006503460770195569, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 1.55e-07, | |
| "loss": 0.031, | |
| "num_tokens": 304811.0, | |
| "reward": -0.41687512397766113, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -0.41687512397766113, | |
| "rewards/chatgpt_combined_reward/std": 8.42953109741211, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 681.0, | |
| "completions/max_terminated_length": 681.0, | |
| "completions/mean_length": 581.25, | |
| "completions/mean_terminated_length": 581.25, | |
| "completions/min_length": 514.0, | |
| "completions/min_terminated_length": 514.0, | |
| "epoch": 0.006596367352626933, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.375, | |
| "learning_rate": 1.5e-07, | |
| "loss": 0.0708, | |
| "num_tokens": 308360.0, | |
| "reward": -2.0002501010894775, | |
| "reward_std": 5.42103385925293, | |
| "rewards/chatgpt_combined_reward/mean": -2.0002501010894775, | |
| "rewards/chatgpt_combined_reward/std": 6.996899127960205, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 852.0, | |
| "completions/max_terminated_length": 852.0, | |
| "completions/mean_length": 611.75, | |
| "completions/mean_terminated_length": 611.75, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.006689273935058299, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 1.45e-07, | |
| "loss": 0.0753, | |
| "num_tokens": 312855.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 683.0, | |
| "completions/max_terminated_length": 683.0, | |
| "completions/mean_length": 599.75, | |
| "completions/mean_terminated_length": 599.75, | |
| "completions/min_length": 556.0, | |
| "completions/min_terminated_length": 556.0, | |
| "epoch": 0.0067821805174896645, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.4e-07, | |
| "loss": -0.0233, | |
| "num_tokens": 317276.0, | |
| "reward": -1.2503752708435059, | |
| "reward_std": 1.7675901651382446, | |
| "rewards/chatgpt_combined_reward/mean": -1.2503752708435059, | |
| "rewards/chatgpt_combined_reward/std": 10.30729866027832, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 685.0, | |
| "completions/mean_length": 707.5, | |
| "completions/mean_terminated_length": 602.0, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.006875087099921029, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 3.984375, | |
| "learning_rate": 1.35e-07, | |
| "loss": 0.0693, | |
| "num_tokens": 322154.0, | |
| "reward": -7.499750137329102, | |
| "reward_std": 1.1783934831619263, | |
| "rewards/chatgpt_combined_reward/mean": -7.499750137329102, | |
| "rewards/chatgpt_combined_reward/std": 3.191626787185669, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 723.0, | |
| "completions/max_terminated_length": 723.0, | |
| "completions/mean_length": 623.0, | |
| "completions/mean_terminated_length": 623.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.006967993682352395, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.3e-07, | |
| "loss": 0.0071, | |
| "num_tokens": 325852.0, | |
| "reward": -2.5002501010894775, | |
| "reward_std": 1.1783934831619263, | |
| "rewards/chatgpt_combined_reward/mean": -2.5002501010894775, | |
| "rewards/chatgpt_combined_reward/std": 8.766212463378906, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 588.0, | |
| "completions/max_terminated_length": 588.0, | |
| "completions/mean_length": 543.0, | |
| "completions/mean_terminated_length": 543.0, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.00706090026478376, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.875, | |
| "learning_rate": 1.25e-07, | |
| "loss": -0.0118, | |
| "num_tokens": 330072.0, | |
| "reward": -4.583125114440918, | |
| "reward_std": 1.7679438591003418, | |
| "rewards/chatgpt_combined_reward/mean": -4.583125114440918, | |
| "rewards/chatgpt_combined_reward/std": 4.589576721191406, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 540.0, | |
| "completions/mean_length": 656.0, | |
| "completions/mean_terminated_length": 533.3333740234375, | |
| "completions/min_length": 527.0, | |
| "completions/min_terminated_length": 527.0, | |
| "epoch": 0.007153806847215125, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.5, | |
| "learning_rate": 1.2e-07, | |
| "loss": 0.1072, | |
| "num_tokens": 334744.0, | |
| "reward": -7.90000057220459, | |
| "reward_std": 3.3941125869750977, | |
| "rewards/chatgpt_combined_reward/mean": -7.90000057220459, | |
| "rewards/chatgpt_combined_reward/std": 4.806246280670166, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 814.0, | |
| "completions/max_terminated_length": 814.0, | |
| "completions/mean_length": 618.25, | |
| "completions/mean_terminated_length": 618.25, | |
| "completions/min_length": 519.0, | |
| "completions/min_terminated_length": 519.0, | |
| "epoch": 0.00724671342964649, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 1.15e-07, | |
| "loss": -0.0338, | |
| "num_tokens": 339217.0, | |
| "reward": -2.0003750324249268, | |
| "reward_std": 0.4714634120464325, | |
| "rewards/chatgpt_combined_reward/mean": -2.0003750324249268, | |
| "rewards/chatgpt_combined_reward/std": 9.253199577331543, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 664.0, | |
| "completions/max_terminated_length": 664.0, | |
| "completions/mean_length": 561.75, | |
| "completions/mean_terminated_length": 561.75, | |
| "completions/min_length": 520.0, | |
| "completions/min_terminated_length": 520.0, | |
| "epoch": 0.007339620012077856, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 1.0999999999999999e-07, | |
| "loss": -0.0368, | |
| "num_tokens": 342692.0, | |
| "reward": -1.2501251697540283, | |
| "reward_std": 2.9463372230529785, | |
| "rewards/chatgpt_combined_reward/mean": -1.2501251697540283, | |
| "rewards/chatgpt_combined_reward/std": 7.622077465057373, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 818.0, | |
| "completions/max_terminated_length": 818.0, | |
| "completions/mean_length": 644.25, | |
| "completions/mean_terminated_length": 644.25, | |
| "completions/min_length": 532.0, | |
| "completions/min_terminated_length": 532.0, | |
| "epoch": 0.0074325265945092206, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 1.0499999999999999e-07, | |
| "loss": -0.1178, | |
| "num_tokens": 346483.0, | |
| "reward": -1.5832499265670776, | |
| "reward_std": 2.23905348777771, | |
| "rewards/chatgpt_combined_reward/mean": -1.5832499265670776, | |
| "rewards/chatgpt_combined_reward/std": 5.307532787322998, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 518.0, | |
| "completions/mean_length": 770.5, | |
| "completions/mean_terminated_length": 517.0, | |
| "completions/min_length": 516.0, | |
| "completions/min_terminated_length": 516.0, | |
| "epoch": 0.007525433176940586, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 1e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 351613.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 636.0, | |
| "completions/max_terminated_length": 636.0, | |
| "completions/mean_length": 553.5, | |
| "completions/mean_terminated_length": 553.5, | |
| "completions/min_length": 525.0, | |
| "completions/min_terminated_length": 525.0, | |
| "epoch": 0.007618339759371952, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.625, | |
| "learning_rate": 9.499999999999999e-08, | |
| "loss": 0.0164, | |
| "num_tokens": 355029.0, | |
| "reward": -2.7502501010894775, | |
| "reward_std": 3.8887336254119873, | |
| "rewards/chatgpt_combined_reward/mean": -2.7502501010894775, | |
| "rewards/chatgpt_combined_reward/std": 9.499552726745605, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 626.0, | |
| "completions/max_terminated_length": 626.0, | |
| "completions/mean_length": 580.75, | |
| "completions/mean_terminated_length": 580.75, | |
| "completions/min_length": 532.0, | |
| "completions/min_terminated_length": 532.0, | |
| "epoch": 0.007711246341803317, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 9e-08, | |
| "loss": 0.0014, | |
| "num_tokens": 358566.0, | |
| "reward": -0.2505002021789551, | |
| "reward_std": 0.3535533845424652, | |
| "rewards/chatgpt_combined_reward/mean": -0.2505002021789551, | |
| "rewards/chatgpt_combined_reward/std": 11.265152931213379, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 693.0, | |
| "completions/mean_length": 705.5, | |
| "completions/mean_terminated_length": 599.3333740234375, | |
| "completions/min_length": 515.0, | |
| "completions/min_terminated_length": 515.0, | |
| "epoch": 0.007804152924234682, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 8.500000000000001e-08, | |
| "loss": -0.1453, | |
| "num_tokens": 363436.0, | |
| "reward": -3.749875068664551, | |
| "reward_std": 4.124730587005615, | |
| "rewards/chatgpt_combined_reward/mean": -3.749875068664551, | |
| "rewards/chatgpt_combined_reward/std": 4.383296489715576, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 890.0, | |
| "completions/max_terminated_length": 890.0, | |
| "completions/mean_length": 618.75, | |
| "completions/mean_terminated_length": 618.75, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.007897059506666047, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.125, | |
| "learning_rate": 8e-08, | |
| "loss": -0.086, | |
| "num_tokens": 367959.0, | |
| "reward": -8.89987564086914, | |
| "reward_std": 1.9800759553909302, | |
| "rewards/chatgpt_combined_reward/mean": -8.89987564086914, | |
| "rewards/chatgpt_combined_reward/std": 2.4292385578155518, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 519.0, | |
| "completions/mean_length": 644.25, | |
| "completions/mean_terminated_length": 517.6666870117188, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.007989966089097413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.0625, | |
| "learning_rate": 7.5e-08, | |
| "loss": 0.1156, | |
| "num_tokens": 370960.0, | |
| "reward": 3.832624912261963, | |
| "reward_std": 3.771176815032959, | |
| "rewards/chatgpt_combined_reward/mean": 3.832624912261963, | |
| "rewards/chatgpt_combined_reward/std": 4.7957444190979, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 731.0, | |
| "completions/max_terminated_length": 731.0, | |
| "completions/mean_length": 585.75, | |
| "completions/mean_terminated_length": 585.75, | |
| "completions/min_length": 520.0, | |
| "completions/min_terminated_length": 520.0, | |
| "epoch": 0.008082872671528778, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 7e-08, | |
| "loss": 0.0622, | |
| "num_tokens": 375351.0, | |
| "reward": -5.983250141143799, | |
| "reward_std": 5.892674446105957, | |
| "rewards/chatgpt_combined_reward/mean": -5.983250141143799, | |
| "rewards/chatgpt_combined_reward/std": 4.826911449432373, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1002.0, | |
| "completions/max_terminated_length": 1002.0, | |
| "completions/mean_length": 724.25, | |
| "completions/mean_terminated_length": 724.25, | |
| "completions/min_length": 562.0, | |
| "completions/min_terminated_length": 562.0, | |
| "epoch": 0.008175779253960144, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.625, | |
| "learning_rate": 6.5e-08, | |
| "loss": -0.0, | |
| "num_tokens": 380296.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 746.0, | |
| "completions/max_terminated_length": 746.0, | |
| "completions/mean_length": 592.25, | |
| "completions/mean_terminated_length": 592.25, | |
| "completions/min_length": 517.0, | |
| "completions/min_terminated_length": 517.0, | |
| "epoch": 0.008268685836391508, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.75, | |
| "learning_rate": 6e-08, | |
| "loss": -0.0, | |
| "num_tokens": 384329.0, | |
| "reward": 4.832625389099121, | |
| "reward_std": 3.771177291870117, | |
| "rewards/chatgpt_combined_reward/mean": 4.832625389099121, | |
| "rewards/chatgpt_combined_reward/std": 3.911078929901123, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 698.0, | |
| "completions/max_terminated_length": 698.0, | |
| "completions/mean_length": 633.0, | |
| "completions/mean_terminated_length": 633.0, | |
| "completions/min_length": 598.0, | |
| "completions/min_terminated_length": 598.0, | |
| "epoch": 0.008361592418822873, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 5.4999999999999996e-08, | |
| "loss": -0.0157, | |
| "num_tokens": 388909.0, | |
| "reward": -5.983250141143799, | |
| "reward_std": 0.21213209629058838, | |
| "rewards/chatgpt_combined_reward/mean": -5.983250141143799, | |
| "rewards/chatgpt_combined_reward/std": 4.468296527862549, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 750.0, | |
| "completions/max_terminated_length": 750.0, | |
| "completions/mean_length": 584.5, | |
| "completions/mean_terminated_length": 584.5, | |
| "completions/min_length": 523.0, | |
| "completions/min_terminated_length": 523.0, | |
| "epoch": 0.008454499001254239, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 5e-08, | |
| "loss": 0.0284, | |
| "num_tokens": 392953.0, | |
| "reward": -2.083624839782715, | |
| "reward_std": 1.7675902843475342, | |
| "rewards/chatgpt_combined_reward/mean": -2.083624839782715, | |
| "rewards/chatgpt_combined_reward/std": 9.366135597229004, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 686.0, | |
| "completions/mean_length": 714.25, | |
| "completions/mean_terminated_length": 611.0, | |
| "completions/min_length": 522.0, | |
| "completions/min_terminated_length": 522.0, | |
| "epoch": 0.008547405583685604, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.0, | |
| "learning_rate": 4.5e-08, | |
| "loss": 0.1335, | |
| "num_tokens": 397858.0, | |
| "reward": -9.316499710083008, | |
| "reward_std": 1.3908792734146118, | |
| "rewards/chatgpt_combined_reward/mean": -9.316499710083008, | |
| "rewards/chatgpt_combined_reward/std": 1.61105215549469, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 848.0, | |
| "completions/max_terminated_length": 848.0, | |
| "completions/mean_length": 737.75, | |
| "completions/mean_terminated_length": 737.75, | |
| "completions/min_length": 639.0, | |
| "completions/min_terminated_length": 639.0, | |
| "epoch": 0.00864031216611697, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 4e-08, | |
| "loss": -0.0, | |
| "num_tokens": 402857.0, | |
| "reward": -8.15000057220459, | |
| "reward_std": 0.21213217079639435, | |
| "rewards/chatgpt_combined_reward/mean": -8.15000057220459, | |
| "rewards/chatgpt_combined_reward/std": 0.387298583984375, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 958.0, | |
| "completions/max_terminated_length": 958.0, | |
| "completions/mean_length": 649.0, | |
| "completions/mean_terminated_length": 649.0, | |
| "completions/min_length": 526.0, | |
| "completions/min_terminated_length": 526.0, | |
| "epoch": 0.008733218748548335, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 9.5, | |
| "learning_rate": 3.5e-08, | |
| "loss": 0.0, | |
| "num_tokens": 407501.0, | |
| "reward": -9.316499710083008, | |
| "reward_std": 1.1787470579147339, | |
| "rewards/chatgpt_combined_reward/mean": -9.316499710083008, | |
| "rewards/chatgpt_combined_reward/std": 1.8061809539794922, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 795.0, | |
| "completions/max_terminated_length": 795.0, | |
| "completions/mean_length": 645.0, | |
| "completions/mean_terminated_length": 645.0, | |
| "completions/min_length": 530.0, | |
| "completions/min_terminated_length": 530.0, | |
| "epoch": 0.0088261253309797, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 8.75, | |
| "learning_rate": 3e-08, | |
| "loss": -0.0538, | |
| "num_tokens": 412129.0, | |
| "reward": -2.583124876022339, | |
| "reward_std": 1.2963035106658936, | |
| "rewards/chatgpt_combined_reward/mean": -2.583124876022339, | |
| "rewards/chatgpt_combined_reward/std": 2.114532947540283, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 806.0, | |
| "completions/mean_length": 732.25, | |
| "completions/mean_terminated_length": 635.0, | |
| "completions/min_length": 525.0, | |
| "completions/min_terminated_length": 525.0, | |
| "epoch": 0.008919031913411065, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.0, | |
| "learning_rate": 2.5e-08, | |
| "loss": 0.0132, | |
| "num_tokens": 416252.0, | |
| "reward": -3.1670000553131104, | |
| "reward_std": 0.2354665994644165, | |
| "rewards/chatgpt_combined_reward/mean": -3.1670000553131104, | |
| "rewards/chatgpt_combined_reward/std": 7.894752502441406, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 772.0, | |
| "completions/max_terminated_length": 772.0, | |
| "completions/mean_length": 650.0, | |
| "completions/mean_terminated_length": 650.0, | |
| "completions/min_length": 518.0, | |
| "completions/min_terminated_length": 518.0, | |
| "epoch": 0.00901193849584243, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 2e-08, | |
| "loss": -0.0286, | |
| "num_tokens": 420900.0, | |
| "reward": -5.916625022888184, | |
| "reward_std": 0.11791006475687027, | |
| "rewards/chatgpt_combined_reward/mean": -5.916625022888184, | |
| "rewards/chatgpt_combined_reward/std": 4.717040538787842, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 652.0, | |
| "completions/mean_length": 683.25, | |
| "completions/mean_terminated_length": 569.6666870117188, | |
| "completions/min_length": 516.0, | |
| "completions/min_terminated_length": 516.0, | |
| "epoch": 0.009104845078273796, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 1.5e-08, | |
| "loss": 0.0434, | |
| "num_tokens": 425291.0, | |
| "reward": -0.8337502479553223, | |
| "reward_std": 1.1783933639526367, | |
| "rewards/chatgpt_combined_reward/mean": -0.8337502479553223, | |
| "rewards/chatgpt_combined_reward/std": 10.671379089355469, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 622.0, | |
| "completions/max_terminated_length": 622.0, | |
| "completions/mean_length": 573.5, | |
| "completions/mean_terminated_length": 573.5, | |
| "completions/min_length": 519.0, | |
| "completions/min_terminated_length": 519.0, | |
| "epoch": 0.009197751660705161, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 1e-08, | |
| "loss": -0.0229, | |
| "num_tokens": 429633.0, | |
| "reward": -7.749750137329102, | |
| "reward_std": 0.8248399496078491, | |
| "rewards/chatgpt_combined_reward/mean": -7.749750137329102, | |
| "rewards/chatgpt_combined_reward/std": 2.7674262523651123, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 757.0, | |
| "completions/max_terminated_length": 757.0, | |
| "completions/mean_length": 595.25, | |
| "completions/mean_terminated_length": 595.25, | |
| "completions/min_length": 531.0, | |
| "completions/min_terminated_length": 531.0, | |
| "epoch": 0.009290658243136527, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 5.25, | |
| "learning_rate": 5e-09, | |
| "loss": 0.0081, | |
| "num_tokens": 433854.0, | |
| "reward": -2.0003747940063477, | |
| "reward_std": 1.6498571634292603, | |
| "rewards/chatgpt_combined_reward/mean": -2.0003747940063477, | |
| "rewards/chatgpt_combined_reward/std": 9.431580543518066, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 433854, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |