Update README.md
Browse files
README.md
CHANGED
@@ -151,44 +151,44 @@ lm_eval \
|
|
151 |
<td rowspan="7"><b>OpenLLM V1</b></td>
|
152 |
<td>ARC Challenge</td>
|
153 |
<td>68.43%</td>
|
154 |
-
<td
|
155 |
-
<td
|
156 |
</tr>
|
157 |
<tr>
|
158 |
<td>GSM8K</td>
|
159 |
<td>88.10%</td>
|
160 |
-
<td
|
161 |
-
<td
|
162 |
</tr>
|
163 |
<tr>
|
164 |
<td>Hellaswag</td>
|
165 |
<td>83.76%</td>
|
166 |
-
<td
|
167 |
-
<td
|
168 |
</tr>
|
169 |
<tr>
|
170 |
<td>MMLU</td>
|
171 |
<td>72.15%</td>
|
172 |
-
<td
|
173 |
-
<td
|
174 |
</tr>
|
175 |
<tr>
|
176 |
<td>Truthfulqa (mc2)</td>
|
177 |
<td>58.13%</td>
|
178 |
-
<td
|
179 |
-
<td
|
180 |
</tr>
|
181 |
<tr>
|
182 |
<td>Winogrande</td>
|
183 |
-
<td>79.40
|
184 |
-
<td
|
185 |
-
<td
|
186 |
</tr>
|
187 |
<tr>
|
188 |
<td><b>Average Score</b></td>
|
189 |
<td><b>74.99%</b></td>
|
190 |
-
<td><b
|
191 |
-
<td><b
|
192 |
</tr>
|
193 |
<tr>
|
194 |
<td rowspan="3"><b>Vision Evals</b></td>
|
|
|
151 |
<td rowspan="7"><b>OpenLLM V1</b></td>
|
152 |
<td>ARC Challenge</td>
|
153 |
<td>68.43%</td>
|
154 |
+
<td>68.86%</td>
|
155 |
+
<td>100.62%</td>
|
156 |
</tr>
|
157 |
<tr>
|
158 |
<td>GSM8K</td>
|
159 |
<td>88.10%</td>
|
160 |
+
<td>88.02%</td>
|
161 |
+
<td>99.91%</td>
|
162 |
</tr>
|
163 |
<tr>
|
164 |
<td>Hellaswag</td>
|
165 |
<td>83.76%</td>
|
166 |
+
<td>83.78%</td>
|
167 |
+
<td>100.02%</td>
|
168 |
</tr>
|
169 |
<tr>
|
170 |
<td>MMLU</td>
|
171 |
<td>72.15%</td>
|
172 |
+
<td>71.80%</td>
|
173 |
+
<td>99.51%</td>
|
174 |
</tr>
|
175 |
<tr>
|
176 |
<td>Truthfulqa (mc2)</td>
|
177 |
<td>58.13%</td>
|
178 |
+
<td>59.35%</td>
|
179 |
+
<td>102.09%</td>
|
180 |
</tr>
|
181 |
<tr>
|
182 |
<td>Winogrande</td>
|
183 |
+
<td>79.40%</td>
|
184 |
+
<td>79.48%</td>
|
185 |
+
<td>100.10%</td>
|
186 |
</tr>
|
187 |
<tr>
|
188 |
<td><b>Average Score</b></td>
|
189 |
<td><b>74.99%</b></td>
|
190 |
+
<td><b>75.21%</b></td>
|
191 |
+
<td><b>100.29%</b></td>
|
192 |
</tr>
|
193 |
<tr>
|
194 |
<td rowspan="3"><b>Vision Evals</b></td>
|