- DanceGRPO
- DanceGRPO_orig
- ImplicitReward
- MinMaxGRPO
- ckpts
- min_max
- odd_poly_rank_based_no_scale_no_tanh
- oddpolyreward
- or_br_ratio
- org_0p2
- original
- rmin_rmax_alpha_grpo_6_38_0.3_0.1_2025.11.10_09.53.48
- rmin_rmax_alpha_grpo_6_38_0.3_0.1_2025.11.10_16.00.34
- rmin_rmax_alpha_grpo_ass_0.15_0.35_0.3_0.1_2025.11.11_08.55.15
- rmin_rmax_alpha_grpo_ass_0.15_0.35_0.3_0.1_2025.11.11_09.24.16
- rmin_rmax_symmetric_alpha
- rmin_rmax_symmetric_alpha_increasing
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.1_2025.11.09_17.17.17
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.3_2025.11.09_18.28.50
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.45_2025.11.09_20.33.10
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.4_{beta}_2025.11.10_01.29.57
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.5_2025.11.09_19.05.44
- rmin_rmax_symmetric_alpha_increasing_grpo_alpha_-1_+1_0.6_2025.11.09_20.17.22
- unified
-
1.52 kB
-
24 Bytes