nm-research commited on
Commit
0058be4
·
verified ·
1 Parent(s): 8176de1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -15
README.md CHANGED
@@ -151,44 +151,44 @@ lm_eval \
151
  <td rowspan="7"><b>OpenLLM V1</b></td>
152
  <td>ARC Challenge</td>
153
  <td>68.43%</td>
154
- <td>%</td>
155
- <td>%</td>
156
  </tr>
157
  <tr>
158
  <td>GSM8K</td>
159
  <td>88.10%</td>
160
- <td>%</td>
161
- <td>%</td>
162
  </tr>
163
  <tr>
164
  <td>Hellaswag</td>
165
  <td>83.76%</td>
166
- <td>%</td>
167
- <td>%</td>
168
  </tr>
169
  <tr>
170
  <td>MMLU</td>
171
  <td>72.15%</td>
172
- <td>%</td>
173
- <td>%</td>
174
  </tr>
175
  <tr>
176
  <td>Truthfulqa (mc2)</td>
177
  <td>58.13%</td>
178
- <td>%</td>
179
- <td>%</td>
180
  </tr>
181
  <tr>
182
  <td>Winogrande</td>
183
- <td>79.40%%</td>
184
- <td>%</td>
185
- <td>%</td>
186
  </tr>
187
  <tr>
188
  <td><b>Average Score</b></td>
189
  <td><b>74.99%</b></td>
190
- <td><b>%</b></td>
191
- <td><b>%</b></td>
192
  </tr>
193
  <tr>
194
  <td rowspan="3"><b>Vision Evals</b></td>
 
151
  <td rowspan="7"><b>OpenLLM V1</b></td>
152
  <td>ARC Challenge</td>
153
  <td>68.43%</td>
154
+ <td>68.86%</td>
155
+ <td>100.62%</td>
156
  </tr>
157
  <tr>
158
  <td>GSM8K</td>
159
  <td>88.10%</td>
160
+ <td>88.02%</td>
161
+ <td>99.91%</td>
162
  </tr>
163
  <tr>
164
  <td>Hellaswag</td>
165
  <td>83.76%</td>
166
+ <td>83.78%</td>
167
+ <td>100.02%</td>
168
  </tr>
169
  <tr>
170
  <td>MMLU</td>
171
  <td>72.15%</td>
172
+ <td>71.80%</td>
173
+ <td>99.51%</td>
174
  </tr>
175
  <tr>
176
  <td>Truthfulqa (mc2)</td>
177
  <td>58.13%</td>
178
+ <td>59.35%</td>
179
+ <td>102.09%</td>
180
  </tr>
181
  <tr>
182
  <td>Winogrande</td>
183
+ <td>79.40%</td>
184
+ <td>79.48%</td>
185
+ <td>100.10%</td>
186
  </tr>
187
  <tr>
188
  <td><b>Average Score</b></td>
189
  <td><b>74.99%</b></td>
190
+ <td><b>75.21%</b></td>
191
+ <td><b>100.29%</b></td>
192
  </tr>
193
  <tr>
194
  <td rowspan="3"><b>Vision Evals</b></td>