| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.99968, | |
| "eval_steps": 300, | |
| "global_step": 781, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 51.5, | |
| "learning_rate": 6.329113924050633e-09, | |
| "logits/generated": 0.1862616091966629, | |
| "logits/real": -0.24196800589561462, | |
| "logps/generated": -538.626953125, | |
| "logps/real": -411.1020202636719, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/generated": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/real": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 56.0, | |
| "learning_rate": 3.1645569620253166e-08, | |
| "logits/generated": 0.348865807056427, | |
| "logits/real": -0.1247345358133316, | |
| "logps/generated": -534.8592529296875, | |
| "logps/real": -401.95263671875, | |
| "loss": 0.7116, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/generated": 0.018089786171913147, | |
| "rewards/margins": -0.02859283983707428, | |
| "rewards/real": -0.010503048077225685, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 52.25, | |
| "learning_rate": 6.329113924050633e-08, | |
| "logits/generated": 0.37161511182785034, | |
| "logits/real": -0.12119238078594208, | |
| "logps/generated": -555.2481689453125, | |
| "logps/real": -418.7288513183594, | |
| "loss": 0.707, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/generated": -0.016772981733083725, | |
| "rewards/margins": -0.006616026163101196, | |
| "rewards/real": -0.02338900789618492, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 50.75, | |
| "learning_rate": 9.49367088607595e-08, | |
| "logits/generated": 0.3857704699039459, | |
| "logits/real": -0.10262864828109741, | |
| "logps/generated": -564.0440673828125, | |
| "logps/real": -434.94561767578125, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/generated": -0.02160583809018135, | |
| "rewards/margins": 0.07228925079107285, | |
| "rewards/real": 0.05068342760205269, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 52.5, | |
| "learning_rate": 1.2658227848101266e-07, | |
| "logits/generated": 0.3662206530570984, | |
| "logits/real": -0.06378354132175446, | |
| "logps/generated": -558.4364013671875, | |
| "logps/real": -439.30816650390625, | |
| "loss": 0.7284, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/generated": 0.002506026765331626, | |
| "rewards/margins": -0.03106103278696537, | |
| "rewards/real": -0.028555002063512802, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 53.25, | |
| "learning_rate": 1.5822784810126582e-07, | |
| "logits/generated": 0.39896726608276367, | |
| "logits/real": -0.12075452506542206, | |
| "logps/generated": -565.0833740234375, | |
| "logps/real": -439.34344482421875, | |
| "loss": 0.708, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/generated": 0.013953039422631264, | |
| "rewards/margins": -0.00905968714505434, | |
| "rewards/real": 0.004893349949270487, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 54.75, | |
| "learning_rate": 1.89873417721519e-07, | |
| "logits/generated": 0.36376339197158813, | |
| "logits/real": -0.15110069513320923, | |
| "logps/generated": -563.3927612304688, | |
| "logps/real": -444.236083984375, | |
| "loss": 0.6823, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/generated": -0.03245529532432556, | |
| "rewards/margins": 0.041769616305828094, | |
| "rewards/real": 0.009314320981502533, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 53.0, | |
| "learning_rate": 2.2151898734177212e-07, | |
| "logits/generated": 0.3546258807182312, | |
| "logits/real": -0.08531169593334198, | |
| "logps/generated": -567.1099853515625, | |
| "logps/real": -426.37530517578125, | |
| "loss": 0.6792, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/generated": -0.04005850851535797, | |
| "rewards/margins": 0.062456123530864716, | |
| "rewards/real": 0.022397616878151894, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 54.0, | |
| "learning_rate": 2.5316455696202533e-07, | |
| "logits/generated": 0.3955411911010742, | |
| "logits/real": -0.09925423562526703, | |
| "logps/generated": -584.2412719726562, | |
| "logps/real": -481.74151611328125, | |
| "loss": 0.7088, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/generated": 0.002044306369498372, | |
| "rewards/margins": 0.014470143243670464, | |
| "rewards/real": 0.016514450311660767, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 49.75, | |
| "learning_rate": 2.848101265822785e-07, | |
| "logits/generated": 0.3811967372894287, | |
| "logits/real": -0.10687746107578278, | |
| "logps/generated": -589.9839477539062, | |
| "logps/real": -475.3309020996094, | |
| "loss": 0.6826, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/generated": -0.01990634948015213, | |
| "rewards/margins": 0.01696391962468624, | |
| "rewards/real": -0.0029424328822642565, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 48.0, | |
| "learning_rate": 3.1645569620253163e-07, | |
| "logits/generated": 0.36268311738967896, | |
| "logits/real": -0.13508598506450653, | |
| "logps/generated": -559.43505859375, | |
| "logps/real": -434.68145751953125, | |
| "loss": 0.6677, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/generated": -0.050643421709537506, | |
| "rewards/margins": 0.07995937764644623, | |
| "rewards/real": 0.029315968975424767, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 54.25, | |
| "learning_rate": 3.481012658227848e-07, | |
| "logits/generated": 0.3685925602912903, | |
| "logits/real": -0.11418122053146362, | |
| "logps/generated": -591.776611328125, | |
| "logps/real": -450.0094299316406, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/generated": -0.06491654366254807, | |
| "rewards/margins": 0.06822647154331207, | |
| "rewards/real": 0.0033099246211349964, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 52.0, | |
| "learning_rate": 3.79746835443038e-07, | |
| "logits/generated": 0.39532652497291565, | |
| "logits/real": -0.08878536522388458, | |
| "logps/generated": -570.0611572265625, | |
| "logps/real": -456.876708984375, | |
| "loss": 0.6667, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/generated": -0.08224920183420181, | |
| "rewards/margins": 0.05731937289237976, | |
| "rewards/real": -0.02492983639240265, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 48.0, | |
| "learning_rate": 4.1139240506329117e-07, | |
| "logits/generated": 0.3644171953201294, | |
| "logits/real": -0.11488844454288483, | |
| "logps/generated": -542.10498046875, | |
| "logps/real": -441.2933044433594, | |
| "loss": 0.6619, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/generated": -0.07368786633014679, | |
| "rewards/margins": 0.10115363448858261, | |
| "rewards/real": 0.02746577560901642, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 48.25, | |
| "learning_rate": 4.4303797468354424e-07, | |
| "logits/generated": 0.3911452889442444, | |
| "logits/real": -0.15676851570606232, | |
| "logps/generated": -570.861083984375, | |
| "logps/real": -487.00848388671875, | |
| "loss": 0.6486, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/generated": -0.1312028169631958, | |
| "rewards/margins": 0.1361098289489746, | |
| "rewards/real": 0.004907005000859499, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 51.25, | |
| "learning_rate": 4.746835443037974e-07, | |
| "logits/generated": 0.38353046774864197, | |
| "logits/real": -0.09031276404857635, | |
| "logps/generated": -571.59765625, | |
| "logps/real": -419.74285888671875, | |
| "loss": 0.6689, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/generated": -0.09446991980075836, | |
| "rewards/margins": 0.06767363846302032, | |
| "rewards/real": -0.026796285063028336, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 51.25, | |
| "learning_rate": 4.992877492877492e-07, | |
| "logits/generated": 0.3525523245334625, | |
| "logits/real": -0.14858296513557434, | |
| "logps/generated": -547.0704345703125, | |
| "logps/real": -442.40228271484375, | |
| "loss": 0.638, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/generated": -0.11326056718826294, | |
| "rewards/margins": 0.12944021821022034, | |
| "rewards/real": 0.016179624944925308, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 48.0, | |
| "learning_rate": 4.957264957264958e-07, | |
| "logits/generated": 0.360097736120224, | |
| "logits/real": -0.18511034548282623, | |
| "logps/generated": -535.1893310546875, | |
| "logps/real": -456.3688049316406, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/generated": -0.18863049149513245, | |
| "rewards/margins": 0.2453247308731079, | |
| "rewards/real": 0.05669426918029785, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 51.0, | |
| "learning_rate": 4.921652421652421e-07, | |
| "logits/generated": 0.3094131052494049, | |
| "logits/real": -0.13698112964630127, | |
| "logps/generated": -578.9557495117188, | |
| "logps/real": -435.10626220703125, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/generated": -0.16466441750526428, | |
| "rewards/margins": 0.2127438485622406, | |
| "rewards/real": 0.04807942733168602, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 49.5, | |
| "learning_rate": 4.886039886039886e-07, | |
| "logits/generated": 0.3409915864467621, | |
| "logits/real": -0.15773525834083557, | |
| "logps/generated": -562.3658447265625, | |
| "logps/real": -472.12261962890625, | |
| "loss": 0.5971, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/generated": -0.2226313352584839, | |
| "rewards/margins": 0.24577657878398895, | |
| "rewards/real": 0.023145252838730812, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 46.5, | |
| "learning_rate": 4.850427350427351e-07, | |
| "logits/generated": 0.3600820302963257, | |
| "logits/real": -0.22068698704242706, | |
| "logps/generated": -518.3560791015625, | |
| "logps/real": -505.414794921875, | |
| "loss": 0.5968, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/generated": -0.22140724956989288, | |
| "rewards/margins": 0.2667246460914612, | |
| "rewards/real": 0.045317377895116806, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 47.0, | |
| "learning_rate": 4.814814814814814e-07, | |
| "logits/generated": 0.3369310796260834, | |
| "logits/real": -0.16511984169483185, | |
| "logps/generated": -597.7069091796875, | |
| "logps/real": -469.93951416015625, | |
| "loss": 0.5653, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/generated": -0.24688784778118134, | |
| "rewards/margins": 0.31749141216278076, | |
| "rewards/real": 0.07060358673334122, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 49.75, | |
| "learning_rate": 4.779202279202279e-07, | |
| "logits/generated": 0.31372249126434326, | |
| "logits/real": -0.13822853565216064, | |
| "logps/generated": -559.5579223632812, | |
| "logps/real": -436.1619567871094, | |
| "loss": 0.5843, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/generated": -0.24767298996448517, | |
| "rewards/margins": 0.2614055275917053, | |
| "rewards/real": 0.013732570223510265, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 44.75, | |
| "learning_rate": 4.743589743589743e-07, | |
| "logits/generated": 0.3527405858039856, | |
| "logits/real": -0.16191214323043823, | |
| "logps/generated": -540.7269287109375, | |
| "logps/real": -481.5582580566406, | |
| "loss": 0.5474, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/generated": -0.3221510648727417, | |
| "rewards/margins": 0.33944302797317505, | |
| "rewards/real": 0.017291929572820663, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 44.0, | |
| "learning_rate": 4.707977207977208e-07, | |
| "logits/generated": 0.31554654240608215, | |
| "logits/real": -0.14937585592269897, | |
| "logps/generated": -533.3251953125, | |
| "logps/real": -425.991455078125, | |
| "loss": 0.5488, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/generated": -0.350756973028183, | |
| "rewards/margins": 0.37362727522850037, | |
| "rewards/real": 0.022870313376188278, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 43.25, | |
| "learning_rate": 4.672364672364672e-07, | |
| "logits/generated": 0.31335240602493286, | |
| "logits/real": -0.16108378767967224, | |
| "logps/generated": -556.67919921875, | |
| "logps/real": -451.10296630859375, | |
| "loss": 0.532, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/generated": -0.3505321741104126, | |
| "rewards/margins": 0.4209725260734558, | |
| "rewards/real": 0.07044035941362381, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 45.75, | |
| "learning_rate": 4.6367521367521367e-07, | |
| "logits/generated": 0.32702523469924927, | |
| "logits/real": -0.16390064358711243, | |
| "logps/generated": -558.4036254882812, | |
| "logps/real": -435.8067932128906, | |
| "loss": 0.5325, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/generated": -0.3638128936290741, | |
| "rewards/margins": 0.40913742780685425, | |
| "rewards/real": 0.045324526727199554, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 44.0, | |
| "learning_rate": 4.601139601139601e-07, | |
| "logits/generated": 0.28192949295043945, | |
| "logits/real": -0.23023930191993713, | |
| "logps/generated": -578.7821655273438, | |
| "logps/real": -447.467529296875, | |
| "loss": 0.532, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/generated": -0.37364333868026733, | |
| "rewards/margins": 0.4050399363040924, | |
| "rewards/real": 0.03139660134911537, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 45.25, | |
| "learning_rate": 4.5655270655270654e-07, | |
| "logits/generated": 0.3297092914581299, | |
| "logits/real": -0.24259880185127258, | |
| "logps/generated": -557.1586303710938, | |
| "logps/real": -533.3529663085938, | |
| "loss": 0.5193, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/generated": -0.3680626153945923, | |
| "rewards/margins": 0.42688456177711487, | |
| "rewards/real": 0.058821968734264374, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 41.25, | |
| "learning_rate": 4.5299145299145297e-07, | |
| "logits/generated": 0.2855086922645569, | |
| "logits/real": -0.16449978947639465, | |
| "logps/generated": -531.9002685546875, | |
| "logps/real": -427.7098693847656, | |
| "loss": 0.5324, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/generated": -0.4032559394836426, | |
| "rewards/margins": 0.4310643672943115, | |
| "rewards/real": 0.027808407321572304, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 47.25, | |
| "learning_rate": 4.494301994301994e-07, | |
| "logits/generated": 0.26499122381210327, | |
| "logits/real": -0.17654789984226227, | |
| "logps/generated": -544.5510864257812, | |
| "logps/real": -443.2236328125, | |
| "loss": 0.5146, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/generated": -0.46017971634864807, | |
| "rewards/margins": 0.47648102045059204, | |
| "rewards/real": 0.016301285475492477, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 43.75, | |
| "learning_rate": 4.4586894586894584e-07, | |
| "logits/generated": 0.32447490096092224, | |
| "logits/real": -0.19060441851615906, | |
| "logps/generated": -565.9913330078125, | |
| "logps/real": -485.25762939453125, | |
| "loss": 0.5035, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/generated": -0.44145339727401733, | |
| "rewards/margins": 0.4792710244655609, | |
| "rewards/real": 0.03781764581799507, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 43.75, | |
| "learning_rate": 4.423076923076923e-07, | |
| "logits/generated": 0.3035694360733032, | |
| "logits/real": -0.22318044304847717, | |
| "logps/generated": -566.83251953125, | |
| "logps/real": -444.7403259277344, | |
| "loss": 0.5005, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/generated": -0.4472742974758148, | |
| "rewards/margins": 0.5138620138168335, | |
| "rewards/real": 0.06658776849508286, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 41.25, | |
| "learning_rate": 4.3874643874643876e-07, | |
| "logits/generated": 0.3123244643211365, | |
| "logits/real": -0.1916072815656662, | |
| "logps/generated": -558.3717041015625, | |
| "logps/real": -460.4476623535156, | |
| "loss": 0.4883, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/generated": -0.4848392605781555, | |
| "rewards/margins": 0.4970267713069916, | |
| "rewards/real": 0.012187558226287365, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 41.25, | |
| "learning_rate": 4.3518518518518514e-07, | |
| "logits/generated": 0.250892698764801, | |
| "logits/real": -0.18243929743766785, | |
| "logps/generated": -536.6226196289062, | |
| "logps/real": -416.77984619140625, | |
| "loss": 0.496, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/generated": -0.4702886939048767, | |
| "rewards/margins": 0.5208121538162231, | |
| "rewards/real": 0.05052344128489494, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 41.0, | |
| "learning_rate": 4.3162393162393163e-07, | |
| "logits/generated": 0.29632076621055603, | |
| "logits/real": -0.2431728094816208, | |
| "logps/generated": -538.0128173828125, | |
| "logps/real": -478.095703125, | |
| "loss": 0.4654, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/generated": -0.5014994144439697, | |
| "rewards/margins": 0.5601893663406372, | |
| "rewards/real": 0.0586898997426033, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 44.5, | |
| "learning_rate": 4.2806267806267807e-07, | |
| "logits/generated": 0.277912437915802, | |
| "logits/real": -0.2574039101600647, | |
| "logps/generated": -561.8548583984375, | |
| "logps/real": -444.5439453125, | |
| "loss": 0.4828, | |
| "rewards/accuracies": 0.875, | |
| "rewards/generated": -0.5042586922645569, | |
| "rewards/margins": 0.5849383473396301, | |
| "rewards/real": 0.08067961782217026, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 43.25, | |
| "learning_rate": 4.245014245014245e-07, | |
| "logits/generated": 0.33910220861434937, | |
| "logits/real": -0.11496976763010025, | |
| "logps/generated": -599.3956298828125, | |
| "logps/real": -421.74334716796875, | |
| "loss": 0.4619, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/generated": -0.572574257850647, | |
| "rewards/margins": 0.6046732664108276, | |
| "rewards/real": 0.032099105417728424, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 43.5, | |
| "learning_rate": 4.2094017094017093e-07, | |
| "logits/generated": 0.32159385085105896, | |
| "logits/real": -0.17387528717517853, | |
| "logps/generated": -586.5929565429688, | |
| "logps/real": -426.36444091796875, | |
| "loss": 0.483, | |
| "rewards/accuracies": 0.875, | |
| "rewards/generated": -0.5183109641075134, | |
| "rewards/margins": 0.5664752721786499, | |
| "rewards/real": 0.0481642410159111, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 39.5, | |
| "learning_rate": 4.173789173789173e-07, | |
| "logits/generated": 0.26057395339012146, | |
| "logits/real": -0.21449565887451172, | |
| "logps/generated": -527.934326171875, | |
| "logps/real": -452.62567138671875, | |
| "loss": 0.465, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/generated": -0.53789222240448, | |
| "rewards/margins": 0.5843125581741333, | |
| "rewards/real": 0.04642019420862198, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 39.5, | |
| "learning_rate": 4.138176638176638e-07, | |
| "logits/generated": 0.3313707411289215, | |
| "logits/real": -0.19944889843463898, | |
| "logps/generated": -580.3385620117188, | |
| "logps/real": -472.051513671875, | |
| "loss": 0.4567, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/generated": -0.5696550607681274, | |
| "rewards/margins": 0.5835729837417603, | |
| "rewards/real": 0.013917927630245686, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 41.0, | |
| "learning_rate": 4.1025641025641024e-07, | |
| "logits/generated": 0.3330136835575104, | |
| "logits/real": -0.1847701370716095, | |
| "logps/generated": -576.3521728515625, | |
| "logps/real": -470.685302734375, | |
| "loss": 0.4635, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/generated": -0.5276187062263489, | |
| "rewards/margins": 0.5738595724105835, | |
| "rewards/real": 0.04624096304178238, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 42.25, | |
| "learning_rate": 4.0669515669515667e-07, | |
| "logits/generated": 0.25724920630455017, | |
| "logits/real": -0.2856508791446686, | |
| "logps/generated": -558.7901611328125, | |
| "logps/real": -467.52008056640625, | |
| "loss": 0.4516, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.5960500836372375, | |
| "rewards/margins": 0.6805692911148071, | |
| "rewards/real": 0.08451925963163376, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 40.25, | |
| "learning_rate": 4.031339031339031e-07, | |
| "logits/generated": 0.28837472200393677, | |
| "logits/real": -0.19708415865898132, | |
| "logps/generated": -549.651123046875, | |
| "logps/real": -457.85919189453125, | |
| "loss": 0.4424, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -0.6616542935371399, | |
| "rewards/margins": 0.6647108793258667, | |
| "rewards/real": 0.0030566223431378603, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 39.5, | |
| "learning_rate": 3.995726495726496e-07, | |
| "logits/generated": 0.30279049277305603, | |
| "logits/real": -0.18387384712696075, | |
| "logps/generated": -541.563720703125, | |
| "logps/real": -444.663818359375, | |
| "loss": 0.4583, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/generated": -0.5965785384178162, | |
| "rewards/margins": 0.6331891417503357, | |
| "rewards/real": 0.03661050274968147, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 40.5, | |
| "learning_rate": 3.9601139601139597e-07, | |
| "logits/generated": 0.25506392121315, | |
| "logits/real": -0.23414012789726257, | |
| "logps/generated": -574.35595703125, | |
| "logps/real": -450.7875061035156, | |
| "loss": 0.4459, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -0.6340950131416321, | |
| "rewards/margins": 0.6694762110710144, | |
| "rewards/real": 0.0353812575340271, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 37.75, | |
| "learning_rate": 3.924501424501424e-07, | |
| "logits/generated": 0.25567466020584106, | |
| "logits/real": -0.20427341759204865, | |
| "logps/generated": -546.7042236328125, | |
| "logps/real": -421.8016662597656, | |
| "loss": 0.4303, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.5935453176498413, | |
| "rewards/margins": 0.685859203338623, | |
| "rewards/real": 0.09231381118297577, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 41.75, | |
| "learning_rate": 3.888888888888889e-07, | |
| "logits/generated": 0.3079773783683777, | |
| "logits/real": -0.21253804862499237, | |
| "logps/generated": -598.437744140625, | |
| "logps/real": -443.98492431640625, | |
| "loss": 0.4276, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.722990870475769, | |
| "rewards/margins": 0.7524241209030151, | |
| "rewards/real": 0.029433395713567734, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 37.5, | |
| "learning_rate": 3.853276353276353e-07, | |
| "logits/generated": 0.30521565675735474, | |
| "logits/real": -0.1906270682811737, | |
| "logps/generated": -541.72607421875, | |
| "logps/real": -424.0105895996094, | |
| "loss": 0.4269, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/generated": -0.667791485786438, | |
| "rewards/margins": 0.7342725992202759, | |
| "rewards/real": 0.06648121774196625, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 36.75, | |
| "learning_rate": 3.8176638176638176e-07, | |
| "logits/generated": 0.3006458282470703, | |
| "logits/real": -0.2351672649383545, | |
| "logps/generated": -559.7643432617188, | |
| "logps/real": -504.17987060546875, | |
| "loss": 0.4507, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.6496528387069702, | |
| "rewards/margins": 0.67330002784729, | |
| "rewards/real": 0.023647133260965347, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 37.0, | |
| "learning_rate": 3.782051282051282e-07, | |
| "logits/generated": 0.2616468071937561, | |
| "logits/real": -0.2550817131996155, | |
| "logps/generated": -556.2052001953125, | |
| "logps/real": -438.43743896484375, | |
| "loss": 0.4118, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -0.7023671865463257, | |
| "rewards/margins": 0.7656707763671875, | |
| "rewards/real": 0.06330356001853943, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 37.0, | |
| "learning_rate": 3.7464387464387463e-07, | |
| "logits/generated": 0.3042835593223572, | |
| "logits/real": -0.22142143547534943, | |
| "logps/generated": -538.9241333007812, | |
| "logps/real": -484.5042419433594, | |
| "loss": 0.4347, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.7196381092071533, | |
| "rewards/margins": 0.7357234358787537, | |
| "rewards/real": 0.016085291281342506, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 38.75, | |
| "learning_rate": 3.7108262108262107e-07, | |
| "logits/generated": 0.28268715739250183, | |
| "logits/real": -0.2414659708738327, | |
| "logps/generated": -552.683837890625, | |
| "logps/real": -439.206298828125, | |
| "loss": 0.4091, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -0.738116443157196, | |
| "rewards/margins": 0.7844719886779785, | |
| "rewards/real": 0.04635554179549217, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 36.5, | |
| "learning_rate": 3.6752136752136755e-07, | |
| "logits/generated": 0.2755090296268463, | |
| "logits/real": -0.2653561234474182, | |
| "logps/generated": -575.4991455078125, | |
| "logps/real": -458.960693359375, | |
| "loss": 0.4082, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.7462833523750305, | |
| "rewards/margins": 0.7978280782699585, | |
| "rewards/real": 0.05154471844434738, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 40.25, | |
| "learning_rate": 3.6396011396011393e-07, | |
| "logits/generated": 0.3112264573574066, | |
| "logits/real": -0.2405599057674408, | |
| "logps/generated": -537.7279052734375, | |
| "logps/real": -463.67718505859375, | |
| "loss": 0.4195, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.7078372240066528, | |
| "rewards/margins": 0.7539125084877014, | |
| "rewards/real": 0.04607529193162918, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 36.75, | |
| "learning_rate": 3.6039886039886037e-07, | |
| "logits/generated": 0.2953110635280609, | |
| "logits/real": -0.21746914088726044, | |
| "logps/generated": -562.6829833984375, | |
| "logps/real": -432.52105712890625, | |
| "loss": 0.3822, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -0.7816409468650818, | |
| "rewards/margins": 0.8443856239318848, | |
| "rewards/real": 0.06274472177028656, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 36.75, | |
| "learning_rate": 3.5683760683760686e-07, | |
| "logits/generated": 0.29402461647987366, | |
| "logits/real": -0.16522033512592316, | |
| "logps/generated": -568.8770751953125, | |
| "logps/real": -452.81866455078125, | |
| "loss": 0.3823, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -0.8258803486824036, | |
| "rewards/margins": 0.9027735590934753, | |
| "rewards/real": 0.07689327746629715, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 37.5, | |
| "learning_rate": 3.5327635327635324e-07, | |
| "logits/generated": 0.2609436810016632, | |
| "logits/real": -0.23396773636341095, | |
| "logps/generated": -557.7428588867188, | |
| "logps/real": -437.74493408203125, | |
| "loss": 0.4063, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.762947678565979, | |
| "rewards/margins": 0.7624271512031555, | |
| "rewards/real": -0.0005204956978559494, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 39.0, | |
| "learning_rate": 3.497150997150997e-07, | |
| "logits/generated": 0.29128846526145935, | |
| "logits/real": -0.29488933086395264, | |
| "logps/generated": -569.3885498046875, | |
| "logps/real": -493.6559143066406, | |
| "loss": 0.3907, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -0.8542326092720032, | |
| "rewards/margins": 0.8633776903152466, | |
| "rewards/real": 0.009145094081759453, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 40.0, | |
| "learning_rate": 3.461538461538461e-07, | |
| "logits/generated": 0.2510134279727936, | |
| "logits/real": -0.23930807411670685, | |
| "logps/generated": -571.7144775390625, | |
| "logps/real": -420.68890380859375, | |
| "loss": 0.3903, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -0.8466684222221375, | |
| "rewards/margins": 0.8632675409317017, | |
| "rewards/real": 0.016599006950855255, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 34.5, | |
| "learning_rate": 3.425925925925926e-07, | |
| "logits/generated": 0.24713429808616638, | |
| "logits/real": -0.2522990107536316, | |
| "logps/generated": -579.431396484375, | |
| "logps/real": -446.91668701171875, | |
| "loss": 0.4016, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -0.852130115032196, | |
| "rewards/margins": 0.845871090888977, | |
| "rewards/real": -0.006259086541831493, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_logits/generated": 0.25726401805877686, | |
| "eval_logits/real": -0.2516762316226959, | |
| "eval_logps/generated": -568.6871948242188, | |
| "eval_logps/real": -468.50299072265625, | |
| "eval_loss": 0.3913811147212982, | |
| "eval_rewards/accuracies": 0.9319999814033508, | |
| "eval_rewards/generated": -0.8142948746681213, | |
| "eval_rewards/margins": 0.8413105010986328, | |
| "eval_rewards/real": 0.027015656232833862, | |
| "eval_runtime": 262.9656, | |
| "eval_samples_per_second": 7.606, | |
| "eval_steps_per_second": 0.951, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 34.0, | |
| "learning_rate": 3.3903133903133903e-07, | |
| "logits/generated": 0.2464553862810135, | |
| "logits/real": -0.24754126369953156, | |
| "logps/generated": -557.3573608398438, | |
| "logps/real": -447.5314025878906, | |
| "loss": 0.3659, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -0.8741347193717957, | |
| "rewards/margins": 0.9578303098678589, | |
| "rewards/real": 0.08369561284780502, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 36.0, | |
| "learning_rate": 3.354700854700854e-07, | |
| "logits/generated": 0.2558444142341614, | |
| "logits/real": -0.2749106287956238, | |
| "logps/generated": -581.9246826171875, | |
| "logps/real": -470.64117431640625, | |
| "loss": 0.3883, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.870457649230957, | |
| "rewards/margins": 0.9120520353317261, | |
| "rewards/real": 0.04159443825483322, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 40.0, | |
| "learning_rate": 3.319088319088319e-07, | |
| "logits/generated": 0.2201302945613861, | |
| "logits/real": -0.2533838152885437, | |
| "logps/generated": -551.3731689453125, | |
| "logps/real": -479.00115966796875, | |
| "loss": 0.3958, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -0.7957965135574341, | |
| "rewards/margins": 0.8228376507759094, | |
| "rewards/real": 0.027041062712669373, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 38.75, | |
| "learning_rate": 3.2834757834757833e-07, | |
| "logits/generated": 0.2794730067253113, | |
| "logits/real": -0.1945553719997406, | |
| "logps/generated": -570.4600830078125, | |
| "logps/real": -423.16229248046875, | |
| "loss": 0.3755, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -0.8808556795120239, | |
| "rewards/margins": 0.8986592292785645, | |
| "rewards/real": 0.01780361495912075, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 36.25, | |
| "learning_rate": 3.2478632478632476e-07, | |
| "logits/generated": 0.28926241397857666, | |
| "logits/real": -0.2784452438354492, | |
| "logps/generated": -584.1411743164062, | |
| "logps/real": -480.142578125, | |
| "loss": 0.3778, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -0.8571932911872864, | |
| "rewards/margins": 0.9160116910934448, | |
| "rewards/real": 0.058818407356739044, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 34.75, | |
| "learning_rate": 3.212250712250712e-07, | |
| "logits/generated": 0.25457730889320374, | |
| "logits/real": -0.2175012081861496, | |
| "logps/generated": -571.3527221679688, | |
| "logps/real": -437.7066345214844, | |
| "loss": 0.3903, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -0.8724948167800903, | |
| "rewards/margins": 0.9130793809890747, | |
| "rewards/real": 0.04058459773659706, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 35.5, | |
| "learning_rate": 3.176638176638177e-07, | |
| "logits/generated": 0.2662949562072754, | |
| "logits/real": -0.2381666898727417, | |
| "logps/generated": -601.4517211914062, | |
| "logps/real": -483.5282287597656, | |
| "loss": 0.3865, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -0.8890978693962097, | |
| "rewards/margins": 0.8633550405502319, | |
| "rewards/real": -0.02574281021952629, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 35.25, | |
| "learning_rate": 3.1410256410256407e-07, | |
| "logits/generated": 0.25792551040649414, | |
| "logits/real": -0.28233546018600464, | |
| "logps/generated": -575.9357299804688, | |
| "logps/real": -461.888916015625, | |
| "loss": 0.3606, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -0.9487969279289246, | |
| "rewards/margins": 1.0083829164505005, | |
| "rewards/real": 0.05958594009280205, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 35.5, | |
| "learning_rate": 3.1054131054131055e-07, | |
| "logits/generated": 0.24235877394676208, | |
| "logits/real": -0.19490960240364075, | |
| "logps/generated": -574.0829467773438, | |
| "logps/real": -401.38031005859375, | |
| "loss": 0.3805, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -0.8425151705741882, | |
| "rewards/margins": 0.8939135670661926, | |
| "rewards/real": 0.0513983853161335, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 33.25, | |
| "learning_rate": 3.06980056980057e-07, | |
| "logits/generated": 0.2665616571903229, | |
| "logits/real": -0.24914798140525818, | |
| "logps/generated": -547.2540283203125, | |
| "logps/real": -449.8600158691406, | |
| "loss": 0.3741, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.8794205784797668, | |
| "rewards/margins": 0.8983514904975891, | |
| "rewards/real": 0.01893080770969391, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 38.0, | |
| "learning_rate": 3.0341880341880337e-07, | |
| "logits/generated": 0.2690719962120056, | |
| "logits/real": -0.26707571744918823, | |
| "logps/generated": -573.3060913085938, | |
| "logps/real": -484.48492431640625, | |
| "loss": 0.3759, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -0.9073358774185181, | |
| "rewards/margins": 0.927741527557373, | |
| "rewards/real": 0.020405706018209457, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 35.0, | |
| "learning_rate": 2.9985754985754986e-07, | |
| "logits/generated": 0.23270320892333984, | |
| "logits/real": -0.2126883715391159, | |
| "logps/generated": -566.2892456054688, | |
| "logps/real": -420.14471435546875, | |
| "loss": 0.373, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -0.9121645092964172, | |
| "rewards/margins": 0.9226058721542358, | |
| "rewards/real": 0.01044147927314043, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 33.0, | |
| "learning_rate": 2.962962962962963e-07, | |
| "logits/generated": 0.24159935116767883, | |
| "logits/real": -0.26122623682022095, | |
| "logps/generated": -570.7769775390625, | |
| "logps/real": -483.9549865722656, | |
| "loss": 0.3691, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.9642108082771301, | |
| "rewards/margins": 0.9902675747871399, | |
| "rewards/real": 0.0260569266974926, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 36.0, | |
| "learning_rate": 2.927350427350427e-07, | |
| "logits/generated": 0.23782770335674286, | |
| "logits/real": -0.2670837640762329, | |
| "logps/generated": -573.4016723632812, | |
| "logps/real": -469.85491943359375, | |
| "loss": 0.3758, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.9776171445846558, | |
| "rewards/margins": 0.9925469160079956, | |
| "rewards/real": 0.01492965966463089, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 34.0, | |
| "learning_rate": 2.8917378917378916e-07, | |
| "logits/generated": 0.2320900857448578, | |
| "logits/real": -0.18077997863292694, | |
| "logps/generated": -559.6778564453125, | |
| "logps/real": -410.2744140625, | |
| "loss": 0.3682, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -0.9255533218383789, | |
| "rewards/margins": 0.9920336008071899, | |
| "rewards/real": 0.06648023426532745, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 37.25, | |
| "learning_rate": 2.8561253561253565e-07, | |
| "logits/generated": 0.21389861404895782, | |
| "logits/real": -0.26197221875190735, | |
| "logps/generated": -571.7706909179688, | |
| "logps/real": -451.16253662109375, | |
| "loss": 0.3799, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.9532166719436646, | |
| "rewards/margins": 0.9206304550170898, | |
| "rewards/real": -0.03258631005883217, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 36.0, | |
| "learning_rate": 2.8205128205128203e-07, | |
| "logits/generated": 0.22054708003997803, | |
| "logits/real": -0.25541600584983826, | |
| "logps/generated": -549.7349853515625, | |
| "logps/real": -407.8997497558594, | |
| "loss": 0.3712, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.875189483165741, | |
| "rewards/margins": 0.9158505201339722, | |
| "rewards/real": 0.04066096618771553, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 35.25, | |
| "learning_rate": 2.7849002849002846e-07, | |
| "logits/generated": 0.24272923171520233, | |
| "logits/real": -0.2981414198875427, | |
| "logps/generated": -567.8425903320312, | |
| "logps/real": -499.34588623046875, | |
| "loss": 0.3782, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/generated": -0.8826544880867004, | |
| "rewards/margins": 0.8991689682006836, | |
| "rewards/real": 0.01651454158127308, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 36.25, | |
| "learning_rate": 2.749287749287749e-07, | |
| "logits/generated": 0.25423914194107056, | |
| "logits/real": -0.27515119314193726, | |
| "logps/generated": -561.5721435546875, | |
| "logps/real": -458.6827087402344, | |
| "loss": 0.3479, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -0.976852536201477, | |
| "rewards/margins": 1.0118058919906616, | |
| "rewards/real": 0.034953318536281586, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 31.125, | |
| "learning_rate": 2.7136752136752133e-07, | |
| "logits/generated": 0.23657219111919403, | |
| "logits/real": -0.28398722410202026, | |
| "logps/generated": -593.6157836914062, | |
| "logps/real": -420.7015686035156, | |
| "loss": 0.3569, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -0.9960700869560242, | |
| "rewards/margins": 1.0824501514434814, | |
| "rewards/real": 0.08638016134500504, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 34.75, | |
| "learning_rate": 2.678062678062678e-07, | |
| "logits/generated": 0.24148711562156677, | |
| "logits/real": -0.21437129378318787, | |
| "logps/generated": -577.3068237304688, | |
| "logps/real": -450.6416015625, | |
| "loss": 0.3691, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.9539799690246582, | |
| "rewards/margins": 1.0042264461517334, | |
| "rewards/real": 0.05024648830294609, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 34.0, | |
| "learning_rate": 2.642450142450142e-07, | |
| "logits/generated": 0.27527815103530884, | |
| "logits/real": -0.2802174687385559, | |
| "logps/generated": -584.6751708984375, | |
| "logps/real": -474.64678955078125, | |
| "loss": 0.3432, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0197203159332275, | |
| "rewards/margins": 1.0799630880355835, | |
| "rewards/real": 0.06024279445409775, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 34.25, | |
| "learning_rate": 2.606837606837607e-07, | |
| "logits/generated": 0.24102333188056946, | |
| "logits/real": -0.2790641188621521, | |
| "logps/generated": -580.9298706054688, | |
| "logps/real": -423.9723205566406, | |
| "loss": 0.3673, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -0.9787546396255493, | |
| "rewards/margins": 1.0065668821334839, | |
| "rewards/real": 0.027812320739030838, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 34.75, | |
| "learning_rate": 2.571225071225071e-07, | |
| "logits/generated": 0.2540944218635559, | |
| "logits/real": -0.19810980558395386, | |
| "logps/generated": -562.8670654296875, | |
| "logps/real": -426.3075256347656, | |
| "loss": 0.35, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.0469673871994019, | |
| "rewards/margins": 1.0949853658676147, | |
| "rewards/real": 0.04801792651414871, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 33.0, | |
| "learning_rate": 2.5356125356125355e-07, | |
| "logits/generated": 0.20098896324634552, | |
| "logits/real": -0.24582453072071075, | |
| "logps/generated": -573.8992309570312, | |
| "logps/real": -420.2386779785156, | |
| "loss": 0.3456, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0094668865203857, | |
| "rewards/margins": 1.0302053689956665, | |
| "rewards/real": 0.020738471299409866, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 36.25, | |
| "learning_rate": 2.5e-07, | |
| "logits/generated": 0.2519596219062805, | |
| "logits/real": -0.27877140045166016, | |
| "logps/generated": -565.7482299804688, | |
| "logps/real": -534.8162841796875, | |
| "loss": 0.3519, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -0.9428899884223938, | |
| "rewards/margins": 0.976216197013855, | |
| "rewards/real": 0.033326275646686554, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 36.75, | |
| "learning_rate": 2.464387464387464e-07, | |
| "logits/generated": 0.23958304524421692, | |
| "logits/real": -0.3024999499320984, | |
| "logps/generated": -558.3704223632812, | |
| "logps/real": -470.7825622558594, | |
| "loss": 0.3492, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0124276876449585, | |
| "rewards/margins": 1.0718011856079102, | |
| "rewards/real": 0.059373389929533005, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 30.75, | |
| "learning_rate": 2.4287749287749286e-07, | |
| "logits/generated": 0.2602802515029907, | |
| "logits/real": -0.28142985701560974, | |
| "logps/generated": -619.5274047851562, | |
| "logps/real": -502.71832275390625, | |
| "loss": 0.3466, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/generated": -1.0051062107086182, | |
| "rewards/margins": 1.0426944494247437, | |
| "rewards/real": 0.037588391453027725, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.393162393162393e-07, | |
| "logits/generated": 0.21535196900367737, | |
| "logits/real": -0.2805056869983673, | |
| "logps/generated": -587.7852172851562, | |
| "logps/real": -429.0276794433594, | |
| "loss": 0.3489, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -0.9763972163200378, | |
| "rewards/margins": 0.9719440340995789, | |
| "rewards/real": -0.004453244619071484, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 33.75, | |
| "learning_rate": 2.3575498575498575e-07, | |
| "logits/generated": 0.22773954272270203, | |
| "logits/real": -0.2382933795452118, | |
| "logps/generated": -591.6124877929688, | |
| "logps/real": -456.0254821777344, | |
| "loss": 0.3504, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.0408380031585693, | |
| "rewards/margins": 1.030290961265564, | |
| "rewards/real": -0.010547079145908356, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 35.25, | |
| "learning_rate": 2.3219373219373216e-07, | |
| "logits/generated": 0.22827064990997314, | |
| "logits/real": -0.3192758560180664, | |
| "logps/generated": -563.3525390625, | |
| "logps/real": -482.61541748046875, | |
| "loss": 0.3454, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.9959697723388672, | |
| "rewards/margins": 1.0202291011810303, | |
| "rewards/real": 0.024259375408291817, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 36.25, | |
| "learning_rate": 2.2863247863247862e-07, | |
| "logits/generated": 0.2440454512834549, | |
| "logits/real": -0.22086623311042786, | |
| "logps/generated": -557.6519165039062, | |
| "logps/real": -446.55230712890625, | |
| "loss": 0.3376, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.0488998889923096, | |
| "rewards/margins": 1.0662249326705933, | |
| "rewards/real": 0.017325039952993393, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 34.5, | |
| "learning_rate": 2.2507122507122505e-07, | |
| "logits/generated": 0.21471574902534485, | |
| "logits/real": -0.2694615423679352, | |
| "logps/generated": -556.4073486328125, | |
| "logps/real": -466.55120849609375, | |
| "loss": 0.3528, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -1.063711404800415, | |
| "rewards/margins": 1.067757487297058, | |
| "rewards/real": 0.004046155605465174, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 35.75, | |
| "learning_rate": 2.215099715099715e-07, | |
| "logits/generated": 0.223112553358078, | |
| "logits/real": -0.2675575315952301, | |
| "logps/generated": -554.9754028320312, | |
| "logps/real": -409.7915954589844, | |
| "loss": 0.3554, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.9735461473464966, | |
| "rewards/margins": 0.9401170015335083, | |
| "rewards/real": -0.03342916816473007, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 32.75, | |
| "learning_rate": 2.1794871794871795e-07, | |
| "logits/generated": 0.24918599426746368, | |
| "logits/real": -0.28682878613471985, | |
| "logps/generated": -571.9830322265625, | |
| "logps/real": -467.03125, | |
| "loss": 0.3409, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.0399566888809204, | |
| "rewards/margins": 1.0961642265319824, | |
| "rewards/real": 0.056207604706287384, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 30.875, | |
| "learning_rate": 2.1438746438746438e-07, | |
| "logits/generated": 0.2370508462190628, | |
| "logits/real": -0.285112202167511, | |
| "logps/generated": -598.0405883789062, | |
| "logps/real": -468.107421875, | |
| "loss": 0.3299, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1196238994598389, | |
| "rewards/margins": 1.1285395622253418, | |
| "rewards/real": 0.008915687911212444, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.1082621082621082e-07, | |
| "logits/generated": 0.21018759906291962, | |
| "logits/real": -0.2482105940580368, | |
| "logps/generated": -559.3743896484375, | |
| "logps/real": -418.2901916503906, | |
| "loss": 0.354, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.0670310258865356, | |
| "rewards/margins": 1.0917372703552246, | |
| "rewards/real": 0.02470635250210762, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 32.5, | |
| "learning_rate": 2.0726495726495728e-07, | |
| "logits/generated": 0.2351360023021698, | |
| "logits/real": -0.3030211925506592, | |
| "logps/generated": -561.6700439453125, | |
| "logps/real": -508.479248046875, | |
| "loss": 0.3462, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -1.0717344284057617, | |
| "rewards/margins": 1.0173569917678833, | |
| "rewards/real": -0.05437753349542618, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.0370370370370369e-07, | |
| "logits/generated": 0.21695998311042786, | |
| "logits/real": -0.262247234582901, | |
| "logps/generated": -560.5780029296875, | |
| "logps/real": -447.3353576660156, | |
| "loss": 0.346, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.0143311023712158, | |
| "rewards/margins": 1.0173934698104858, | |
| "rewards/real": 0.0030622382182627916, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 33.5, | |
| "learning_rate": 2.0014245014245012e-07, | |
| "logits/generated": 0.24932141602039337, | |
| "logits/real": -0.28666001558303833, | |
| "logps/generated": -548.8845825195312, | |
| "logps/real": -473.7200622558594, | |
| "loss": 0.359, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -0.9833143353462219, | |
| "rewards/margins": 0.9953653216362, | |
| "rewards/real": 0.012050976976752281, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 30.5, | |
| "learning_rate": 1.9658119658119656e-07, | |
| "logits/generated": 0.2177625447511673, | |
| "logits/real": -0.27629366517066956, | |
| "logps/generated": -541.3976440429688, | |
| "logps/real": -459.02227783203125, | |
| "loss": 0.3403, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -0.9844304919242859, | |
| "rewards/margins": 1.0443334579467773, | |
| "rewards/real": 0.0599028654396534, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 33.25, | |
| "learning_rate": 1.9301994301994302e-07, | |
| "logits/generated": 0.22767607867717743, | |
| "logits/real": -0.25904038548469543, | |
| "logps/generated": -568.2036743164062, | |
| "logps/real": -443.6368713378906, | |
| "loss": 0.3417, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0040171146392822, | |
| "rewards/margins": 1.078296184539795, | |
| "rewards/real": 0.07427913695573807, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 34.75, | |
| "learning_rate": 1.8945868945868945e-07, | |
| "logits/generated": 0.2245851755142212, | |
| "logits/real": -0.3053443431854248, | |
| "logps/generated": -569.513671875, | |
| "logps/real": -470.54547119140625, | |
| "loss": 0.3494, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -1.0781540870666504, | |
| "rewards/margins": 1.0763808488845825, | |
| "rewards/real": -0.0017731055850163102, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 33.25, | |
| "learning_rate": 1.8589743589743588e-07, | |
| "logits/generated": 0.228353351354599, | |
| "logits/real": -0.27608901262283325, | |
| "logps/generated": -549.1878662109375, | |
| "logps/real": -457.79681396484375, | |
| "loss": 0.3254, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0618976354599, | |
| "rewards/margins": 1.0646899938583374, | |
| "rewards/real": 0.0027922815643250942, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 32.5, | |
| "learning_rate": 1.8233618233618234e-07, | |
| "logits/generated": 0.24573054909706116, | |
| "logits/real": -0.24587111175060272, | |
| "logps/generated": -527.3595581054688, | |
| "logps/real": -461.79351806640625, | |
| "loss": 0.3365, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -0.9843952059745789, | |
| "rewards/margins": 1.0428409576416016, | |
| "rewards/real": 0.05844569206237793, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 30.5, | |
| "learning_rate": 1.7877492877492878e-07, | |
| "logits/generated": 0.2456216812133789, | |
| "logits/real": -0.3523867428302765, | |
| "logps/generated": -584.2765502929688, | |
| "logps/real": -485.2999572753906, | |
| "loss": 0.3442, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/generated": -1.1297935247421265, | |
| "rewards/margins": 1.1257268190383911, | |
| "rewards/real": -0.0040667857974767685, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 32.75, | |
| "learning_rate": 1.752136752136752e-07, | |
| "logits/generated": 0.24020275473594666, | |
| "logits/real": -0.2194860428571701, | |
| "logps/generated": -575.4654541015625, | |
| "logps/real": -414.576416015625, | |
| "loss": 0.3328, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1157715320587158, | |
| "rewards/margins": 1.0961633920669556, | |
| "rewards/real": -0.01960797980427742, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 31.75, | |
| "learning_rate": 1.7165242165242165e-07, | |
| "logits/generated": 0.20955972373485565, | |
| "logits/real": -0.25930672883987427, | |
| "logps/generated": -591.9100341796875, | |
| "logps/real": -403.3531799316406, | |
| "loss": 0.3286, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -1.076982855796814, | |
| "rewards/margins": 1.1210120916366577, | |
| "rewards/real": 0.044029366225004196, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 31.375, | |
| "learning_rate": 1.6809116809116808e-07, | |
| "logits/generated": 0.18421868979930878, | |
| "logits/real": -0.29251617193222046, | |
| "logps/generated": -552.1446533203125, | |
| "logps/real": -425.1080627441406, | |
| "loss": 0.3362, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -0.986895740032196, | |
| "rewards/margins": 1.0399030447006226, | |
| "rewards/real": 0.05300729721784592, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 32.5, | |
| "learning_rate": 1.6452991452991452e-07, | |
| "logits/generated": 0.22791898250579834, | |
| "logits/real": -0.28665608167648315, | |
| "logps/generated": -598.5328369140625, | |
| "logps/real": -407.1868591308594, | |
| "loss": 0.3265, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.205278754234314, | |
| "rewards/margins": 1.2098186016082764, | |
| "rewards/real": 0.004539764020591974, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 31.625, | |
| "learning_rate": 1.6096866096866095e-07, | |
| "logits/generated": 0.22963444888591766, | |
| "logits/real": -0.24107995629310608, | |
| "logps/generated": -566.4512329101562, | |
| "logps/real": -450.96978759765625, | |
| "loss": 0.3351, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.081211805343628, | |
| "rewards/margins": 1.0905853509902954, | |
| "rewards/real": 0.009373527020215988, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 34.0, | |
| "learning_rate": 1.574074074074074e-07, | |
| "logits/generated": 0.24042251706123352, | |
| "logits/real": -0.21857628226280212, | |
| "logps/generated": -569.6424560546875, | |
| "logps/real": -448.65948486328125, | |
| "loss": 0.331, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.0992162227630615, | |
| "rewards/margins": 1.157705545425415, | |
| "rewards/real": 0.058489274233579636, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 33.25, | |
| "learning_rate": 1.5384615384615385e-07, | |
| "logits/generated": 0.1823839247226715, | |
| "logits/real": -0.2623021900653839, | |
| "logps/generated": -577.726318359375, | |
| "logps/real": -445.66534423828125, | |
| "loss": 0.3352, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.0678300857543945, | |
| "rewards/margins": 1.0674610137939453, | |
| "rewards/real": -0.000368914392311126, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 30.75, | |
| "learning_rate": 1.5028490028490028e-07, | |
| "logits/generated": 0.2117844521999359, | |
| "logits/real": -0.2711123526096344, | |
| "logps/generated": -577.0066528320312, | |
| "logps/real": -406.86175537109375, | |
| "loss": 0.3154, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/generated": -1.0694202184677124, | |
| "rewards/margins": 1.1183080673217773, | |
| "rewards/real": 0.04888775572180748, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 32.75, | |
| "learning_rate": 1.4672364672364671e-07, | |
| "logits/generated": 0.2499610185623169, | |
| "logits/real": -0.2550189197063446, | |
| "logps/generated": -583.8297119140625, | |
| "logps/real": -480.6053771972656, | |
| "loss": 0.3408, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.01523756980896, | |
| "rewards/margins": 1.0397374629974365, | |
| "rewards/real": 0.024500016123056412, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 32.5, | |
| "learning_rate": 1.4316239316239315e-07, | |
| "logits/generated": 0.1771518439054489, | |
| "logits/real": -0.3018808662891388, | |
| "logps/generated": -560.4537963867188, | |
| "logps/real": -397.22137451171875, | |
| "loss": 0.3314, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/generated": -1.1572097539901733, | |
| "rewards/margins": 1.1902085542678833, | |
| "rewards/real": 0.03299868851900101, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 31.375, | |
| "learning_rate": 1.3960113960113958e-07, | |
| "logits/generated": 0.192919060587883, | |
| "logits/real": -0.31041043996810913, | |
| "logps/generated": -532.7413330078125, | |
| "logps/real": -462.16693115234375, | |
| "loss": 0.3311, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.1133582592010498, | |
| "rewards/margins": 1.1442750692367554, | |
| "rewards/real": 0.030916890129446983, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 32.75, | |
| "learning_rate": 1.3603988603988604e-07, | |
| "logits/generated": 0.20842699706554413, | |
| "logits/real": -0.30351361632347107, | |
| "logps/generated": -582.7373657226562, | |
| "logps/real": -480.19073486328125, | |
| "loss": 0.3336, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.1188085079193115, | |
| "rewards/margins": 1.1176929473876953, | |
| "rewards/real": -0.0011154465610161424, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 31.125, | |
| "learning_rate": 1.3247863247863248e-07, | |
| "logits/generated": 0.1861368715763092, | |
| "logits/real": -0.2930435240268707, | |
| "logps/generated": -560.4434814453125, | |
| "logps/real": -442.20574951171875, | |
| "loss": 0.3037, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/generated": -1.1219825744628906, | |
| "rewards/margins": 1.1991544961929321, | |
| "rewards/real": 0.0771719291806221, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 32.5, | |
| "learning_rate": 1.289173789173789e-07, | |
| "logits/generated": 0.2440781146287918, | |
| "logits/real": -0.2519104480743408, | |
| "logps/generated": -577.5955810546875, | |
| "logps/real": -426.29083251953125, | |
| "loss": 0.3451, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.0716168880462646, | |
| "rewards/margins": 1.0959482192993164, | |
| "rewards/real": 0.024331340566277504, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_logits/generated": 0.22350193560123444, | |
| "eval_logits/real": -0.28475213050842285, | |
| "eval_logps/generated": -571.62451171875, | |
| "eval_logps/real": -468.817138671875, | |
| "eval_loss": 0.3355465829372406, | |
| "eval_rewards/accuracies": 0.9599999785423279, | |
| "eval_rewards/generated": -1.108030080795288, | |
| "eval_rewards/margins": 1.1036338806152344, | |
| "eval_rewards/real": -0.004396026488393545, | |
| "eval_runtime": 262.6885, | |
| "eval_samples_per_second": 7.614, | |
| "eval_steps_per_second": 0.952, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 31.25, | |
| "learning_rate": 1.2535612535612535e-07, | |
| "logits/generated": 0.26258888840675354, | |
| "logits/real": -0.27824801206588745, | |
| "logps/generated": -587.973876953125, | |
| "logps/real": -470.12603759765625, | |
| "loss": 0.3066, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.2446457147598267, | |
| "rewards/margins": 1.2537733316421509, | |
| "rewards/real": 0.009127420373260975, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 34.0, | |
| "learning_rate": 1.2179487179487178e-07, | |
| "logits/generated": 0.22428826987743378, | |
| "logits/real": -0.24908749759197235, | |
| "logps/generated": -574.6671752929688, | |
| "logps/real": -423.6082458496094, | |
| "loss": 0.3503, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.131711721420288, | |
| "rewards/margins": 1.1664810180664062, | |
| "rewards/real": 0.03476935625076294, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 36.0, | |
| "learning_rate": 1.1823361823361823e-07, | |
| "logits/generated": 0.2149450033903122, | |
| "logits/real": -0.3522172272205353, | |
| "logps/generated": -569.8680419921875, | |
| "logps/real": -522.5927734375, | |
| "loss": 0.3041, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/generated": -1.102595567703247, | |
| "rewards/margins": 1.1690142154693604, | |
| "rewards/real": 0.06641869246959686, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 34.0, | |
| "learning_rate": 1.1467236467236467e-07, | |
| "logits/generated": 0.21769335865974426, | |
| "logits/real": -0.3188803493976593, | |
| "logps/generated": -525.9594116210938, | |
| "logps/real": -459.16900634765625, | |
| "loss": 0.3244, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/generated": -1.1512959003448486, | |
| "rewards/margins": 1.1399847269058228, | |
| "rewards/real": -0.011311108246445656, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 37.25, | |
| "learning_rate": 1.111111111111111e-07, | |
| "logits/generated": 0.1936269998550415, | |
| "logits/real": -0.2803877294063568, | |
| "logps/generated": -540.1295166015625, | |
| "logps/real": -444.78375244140625, | |
| "loss": 0.3322, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -1.0809428691864014, | |
| "rewards/margins": 1.1129690408706665, | |
| "rewards/real": 0.03202588111162186, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 29.625, | |
| "learning_rate": 1.0754985754985754e-07, | |
| "logits/generated": 0.19856727123260498, | |
| "logits/real": -0.27393585443496704, | |
| "logps/generated": -560.6895751953125, | |
| "logps/real": -426.6712951660156, | |
| "loss": 0.3194, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.1213538646697998, | |
| "rewards/margins": 1.142456293106079, | |
| "rewards/real": 0.021102434024214745, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 31.75, | |
| "learning_rate": 1.0398860398860399e-07, | |
| "logits/generated": 0.21278016269207, | |
| "logits/real": -0.24991516768932343, | |
| "logps/generated": -587.5706176757812, | |
| "logps/real": -404.28033447265625, | |
| "loss": 0.3189, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.1712287664413452, | |
| "rewards/margins": 1.2088580131530762, | |
| "rewards/real": 0.037629302591085434, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 31.75, | |
| "learning_rate": 1.0042735042735042e-07, | |
| "logits/generated": 0.22561617195606232, | |
| "logits/real": -0.20534536242485046, | |
| "logps/generated": -575.3756713867188, | |
| "logps/real": -442.997802734375, | |
| "loss": 0.3369, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/generated": -1.1977014541625977, | |
| "rewards/margins": 1.1940991878509521, | |
| "rewards/real": -0.003602252807468176, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 31.875, | |
| "learning_rate": 9.686609686609686e-08, | |
| "logits/generated": 0.2502003610134125, | |
| "logits/real": -0.2464480847120285, | |
| "logps/generated": -574.8699340820312, | |
| "logps/real": -462.56134033203125, | |
| "loss": 0.3468, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -1.0637601613998413, | |
| "rewards/margins": 1.0312858819961548, | |
| "rewards/real": -0.032474346458911896, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 29.875, | |
| "learning_rate": 9.33048433048433e-08, | |
| "logits/generated": 0.2645814120769501, | |
| "logits/real": -0.2936360836029053, | |
| "logps/generated": -551.9849243164062, | |
| "logps/real": -463.1893005371094, | |
| "loss": 0.3202, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -1.102562665939331, | |
| "rewards/margins": 1.100036382675171, | |
| "rewards/real": -0.002526202006265521, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 33.25, | |
| "learning_rate": 8.974358974358974e-08, | |
| "logits/generated": 0.24026402831077576, | |
| "logits/real": -0.24332275986671448, | |
| "logps/generated": -586.1027221679688, | |
| "logps/real": -457.7566833496094, | |
| "loss": 0.3439, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -1.0856201648712158, | |
| "rewards/margins": 1.0498621463775635, | |
| "rewards/real": -0.035757891833782196, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 32.75, | |
| "learning_rate": 8.618233618233619e-08, | |
| "logits/generated": 0.22486528754234314, | |
| "logits/real": -0.33010414242744446, | |
| "logps/generated": -575.5369262695312, | |
| "logps/real": -444.61712646484375, | |
| "loss": 0.3306, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1358319520950317, | |
| "rewards/margins": 1.1036947965621948, | |
| "rewards/real": -0.03213699907064438, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 35.0, | |
| "learning_rate": 8.262108262108261e-08, | |
| "logits/generated": 0.22603563964366913, | |
| "logits/real": -0.2633499503135681, | |
| "logps/generated": -588.1068725585938, | |
| "logps/real": -409.90362548828125, | |
| "loss": 0.3222, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.1506733894348145, | |
| "rewards/margins": 1.1825590133666992, | |
| "rewards/real": 0.03188558295369148, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 32.0, | |
| "learning_rate": 7.905982905982906e-08, | |
| "logits/generated": 0.2575734257698059, | |
| "logits/real": -0.26269882917404175, | |
| "logps/generated": -613.2880859375, | |
| "logps/real": -436.75689697265625, | |
| "loss": 0.3172, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/generated": -1.1868383884429932, | |
| "rewards/margins": 1.1978209018707275, | |
| "rewards/real": 0.010982497595250607, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 36.5, | |
| "learning_rate": 7.549857549857549e-08, | |
| "logits/generated": 0.21627910435199738, | |
| "logits/real": -0.31892699003219604, | |
| "logps/generated": -590.7273559570312, | |
| "logps/real": -460.1044921875, | |
| "loss": 0.3361, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.1508667469024658, | |
| "rewards/margins": 1.1213773488998413, | |
| "rewards/real": -0.029489323496818542, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 30.5, | |
| "learning_rate": 7.193732193732194e-08, | |
| "logits/generated": 0.2537713944911957, | |
| "logits/real": -0.2855969965457916, | |
| "logps/generated": -577.8758544921875, | |
| "logps/real": -475.7315368652344, | |
| "loss": 0.3277, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/generated": -1.1186813116073608, | |
| "rewards/margins": 1.1131179332733154, | |
| "rewards/real": -0.005563369486480951, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 31.25, | |
| "learning_rate": 6.837606837606839e-08, | |
| "logits/generated": 0.20594389736652374, | |
| "logits/real": -0.31517493724823, | |
| "logps/generated": -553.8355712890625, | |
| "logps/real": -434.3343811035156, | |
| "loss": 0.3286, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.1672719717025757, | |
| "rewards/margins": 1.2395508289337158, | |
| "rewards/real": 0.07227896898984909, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 32.0, | |
| "learning_rate": 6.481481481481481e-08, | |
| "logits/generated": 0.205632284283638, | |
| "logits/real": -0.255433589220047, | |
| "logps/generated": -570.3898315429688, | |
| "logps/real": -464.691650390625, | |
| "loss": 0.3308, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.0862834453582764, | |
| "rewards/margins": 1.1157293319702148, | |
| "rewards/real": 0.029445823282003403, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 33.75, | |
| "learning_rate": 6.125356125356125e-08, | |
| "logits/generated": 0.23636643588542938, | |
| "logits/real": -0.2590656280517578, | |
| "logps/generated": -598.99755859375, | |
| "logps/real": -467.8699645996094, | |
| "loss": 0.3365, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.104642629623413, | |
| "rewards/margins": 1.1386440992355347, | |
| "rewards/real": 0.034001342952251434, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 33.5, | |
| "learning_rate": 5.7692307692307695e-08, | |
| "logits/generated": 0.25425681471824646, | |
| "logits/real": -0.2653648853302002, | |
| "logps/generated": -569.2490844726562, | |
| "logps/real": -475.88763427734375, | |
| "loss": 0.3354, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1180486679077148, | |
| "rewards/margins": 1.0991861820220947, | |
| "rewards/real": -0.018862556666135788, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 30.875, | |
| "learning_rate": 5.413105413105413e-08, | |
| "logits/generated": 0.22857370972633362, | |
| "logits/real": -0.22129984200000763, | |
| "logps/generated": -563.016357421875, | |
| "logps/real": -403.68597412109375, | |
| "loss": 0.3157, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.0614380836486816, | |
| "rewards/margins": 1.1257336139678955, | |
| "rewards/real": 0.06429564207792282, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 36.75, | |
| "learning_rate": 5.056980056980057e-08, | |
| "logits/generated": 0.21650013327598572, | |
| "logits/real": -0.3323083817958832, | |
| "logps/generated": -581.8919677734375, | |
| "logps/real": -487.620361328125, | |
| "loss": 0.3226, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/generated": -1.2059048414230347, | |
| "rewards/margins": 1.1841565370559692, | |
| "rewards/real": -0.021748319268226624, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 30.875, | |
| "learning_rate": 4.7008547008547005e-08, | |
| "logits/generated": 0.2445484846830368, | |
| "logits/real": -0.31186121702194214, | |
| "logps/generated": -557.0134887695312, | |
| "logps/real": -493.17230224609375, | |
| "loss": 0.3212, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1671897172927856, | |
| "rewards/margins": 1.1586859226226807, | |
| "rewards/real": -0.00850372202694416, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 34.25, | |
| "learning_rate": 4.3447293447293445e-08, | |
| "logits/generated": 0.21253076195716858, | |
| "logits/real": -0.2138686180114746, | |
| "logps/generated": -586.7265625, | |
| "logps/real": -408.2170104980469, | |
| "loss": 0.3419, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/generated": -1.1229650974273682, | |
| "rewards/margins": 1.1265299320220947, | |
| "rewards/real": 0.0035647773183882236, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 29.75, | |
| "learning_rate": 3.9886039886039886e-08, | |
| "logits/generated": 0.18125857412815094, | |
| "logits/real": -0.32901662588119507, | |
| "logps/generated": -537.9093017578125, | |
| "logps/real": -448.9685974121094, | |
| "loss": 0.3366, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.0713820457458496, | |
| "rewards/margins": 1.105197548866272, | |
| "rewards/real": 0.033815376460552216, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 32.25, | |
| "learning_rate": 3.632478632478633e-08, | |
| "logits/generated": 0.21634677052497864, | |
| "logits/real": -0.30626875162124634, | |
| "logps/generated": -582.5755004882812, | |
| "logps/real": -460.1416931152344, | |
| "loss": 0.3136, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1917552947998047, | |
| "rewards/margins": 1.2281749248504639, | |
| "rewards/real": 0.03641948103904724, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 30.625, | |
| "learning_rate": 3.276353276353276e-08, | |
| "logits/generated": 0.2181752622127533, | |
| "logits/real": -0.34954366087913513, | |
| "logps/generated": -566.8768310546875, | |
| "logps/real": -490.39385986328125, | |
| "loss": 0.3149, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.2017440795898438, | |
| "rewards/margins": 1.2231340408325195, | |
| "rewards/real": 0.0213897917419672, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 30.75, | |
| "learning_rate": 2.92022792022792e-08, | |
| "logits/generated": 0.250019907951355, | |
| "logits/real": -0.23180410265922546, | |
| "logps/generated": -569.7205810546875, | |
| "logps/real": -436.3052673339844, | |
| "loss": 0.3193, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/generated": -1.0622925758361816, | |
| "rewards/margins": 1.074755311012268, | |
| "rewards/real": 0.012462759390473366, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 32.0, | |
| "learning_rate": 2.564102564102564e-08, | |
| "logits/generated": 0.2107166051864624, | |
| "logits/real": -0.28946200013160706, | |
| "logps/generated": -562.4656982421875, | |
| "logps/real": -423.35076904296875, | |
| "loss": 0.3288, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1599444150924683, | |
| "rewards/margins": 1.1884777545928955, | |
| "rewards/real": 0.02853330597281456, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.2079772079772077e-08, | |
| "logits/generated": 0.2310904562473297, | |
| "logits/real": -0.2765614092350006, | |
| "logps/generated": -604.2635498046875, | |
| "logps/real": -450.979248046875, | |
| "loss": 0.3074, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.2296929359436035, | |
| "rewards/margins": 1.2464648485183716, | |
| "rewards/real": 0.016771936789155006, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 31.125, | |
| "learning_rate": 1.8518518518518518e-08, | |
| "logits/generated": 0.17430028319358826, | |
| "logits/real": -0.33389025926589966, | |
| "logps/generated": -549.4244995117188, | |
| "logps/real": -464.75653076171875, | |
| "loss": 0.3085, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.1594291925430298, | |
| "rewards/margins": 1.2221739292144775, | |
| "rewards/real": 0.0627446323633194, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 32.0, | |
| "learning_rate": 1.4957264957264956e-08, | |
| "logits/generated": 0.24278989434242249, | |
| "logits/real": -0.26737385988235474, | |
| "logps/generated": -573.1634521484375, | |
| "logps/real": -469.896484375, | |
| "loss": 0.3263, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/generated": -1.049228310585022, | |
| "rewards/margins": 1.1048952341079712, | |
| "rewards/real": 0.055667001754045486, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 34.5, | |
| "learning_rate": 1.1396011396011397e-08, | |
| "logits/generated": 0.1711110770702362, | |
| "logits/real": -0.35616591572761536, | |
| "logps/generated": -552.7576293945312, | |
| "logps/real": -433.0294494628906, | |
| "loss": 0.3352, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/generated": -1.0972100496292114, | |
| "rewards/margins": 1.1236265897750854, | |
| "rewards/real": 0.02641637995839119, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 33.75, | |
| "learning_rate": 7.834757834757834e-09, | |
| "logits/generated": 0.3013153672218323, | |
| "logits/real": -0.22638103365898132, | |
| "logps/generated": -591.3074951171875, | |
| "logps/real": -471.1341857910156, | |
| "loss": 0.3269, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/generated": -1.1128944158554077, | |
| "rewards/margins": 1.088680624961853, | |
| "rewards/real": -0.02421378344297409, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 31.0, | |
| "learning_rate": 4.273504273504274e-09, | |
| "logits/generated": 0.2685278058052063, | |
| "logits/real": -0.22944375872612, | |
| "logps/generated": -570.2601928710938, | |
| "logps/real": -418.1946716308594, | |
| "loss": 0.3241, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/generated": -1.138912320137024, | |
| "rewards/margins": 1.1833699941635132, | |
| "rewards/real": 0.04445788636803627, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 33.25, | |
| "learning_rate": 7.122507122507123e-10, | |
| "logits/generated": 0.23055453598499298, | |
| "logits/real": -0.3259603977203369, | |
| "logps/generated": -538.0984497070312, | |
| "logps/real": -500.5188903808594, | |
| "loss": 0.3448, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/generated": -1.0663647651672363, | |
| "rewards/margins": 1.0742255449295044, | |
| "rewards/real": 0.007860781624913216, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 781, | |
| "total_flos": 0.0, | |
| "train_loss": 0.41590739681687156, | |
| "train_runtime": 9143.8407, | |
| "train_samples_per_second": 5.468, | |
| "train_steps_per_second": 0.085 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 781, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |
