Update README.md
Browse files
README.md
CHANGED
|
@@ -165,6 +165,7 @@ v2:
|
|
| 165 |
* Framework: https://github.com/tatsu-lab/alpaca_eval
|
| 166 |
* Evaluator: alpaca_eval_cot_gpt4_turbo_fn
|
| 167 |
|
|
|
|
| 168 |
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
| 169 |
|-----|-----|-----|-----|-----|
|
| 170 |
|chatgpt_4_turbo | 76.04 | 90.00 |1.46 | 1270 |
|
|
@@ -177,4 +178,12 @@ v2:
|
|
| 177 |
|saiga_llama3_8b, v4 | 43.64 | 65.90 | 2.31 | 1200 |
|
| 178 |
|saiga_llama3_8b, v3 | 36.97 | 61.08 | 2.38 | 1162 |
|
| 179 |
|saiga_llama3_8b, v2 | 33.07 | 48.19 | 2.45 | 1166 |
|
| 180 |
-
|saiga_mistral_7b | 23.38 | 35.99 | 2.34 | 949 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
* Framework: https://github.com/tatsu-lab/alpaca_eval
|
| 166 |
* Evaluator: alpaca_eval_cot_gpt4_turbo_fn
|
| 167 |
|
| 168 |
+
Pivot: chatgpt_3_5_turbo
|
| 169 |
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
| 170 |
|-----|-----|-----|-----|-----|
|
| 171 |
|chatgpt_4_turbo | 76.04 | 90.00 |1.46 | 1270 |
|
|
|
|
| 178 |
|saiga_llama3_8b, v4 | 43.64 | 65.90 | 2.31 | 1200 |
|
| 179 |
|saiga_llama3_8b, v3 | 36.97 | 61.08 | 2.38 | 1162 |
|
| 180 |
|saiga_llama3_8b, v2 | 33.07 | 48.19 | 2.45 | 1166 |
|
| 181 |
+
|saiga_mistral_7b | 23.38 | 35.99 | 2.34 | 949 |
|
| 182 |
+
|
| 183 |
+
Pivot: sfr
|
| 184 |
+
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
| 185 |
+
|-----|-----|-----|-----|-----|
|
| 186 |
+
| sfr | 50.00 | 50.00 | 0.00 | 1215 |
|
| 187 |
+
| saiga_llama3_8b, v7 | 48.95 | 49.16 | 2.46 | 1266 |
|
| 188 |
+
| saiga_llama3_8b, v6 | 46.91 | 47.23 | 2.45 | 1262 |
|
| 189 |
+
| suzume_8b | 43.69 | 48.19 | 2.46 | 1325 |
|