ranarag commited on
Commit
0edf012
·
verified ·
1 Parent(s): d558429

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -86
README.md CHANGED
@@ -61,9 +61,9 @@ print(output)
61
  ```
62
 
63
  **Evaluation Results:**
64
- <!-- <todo>Add results</todo>
65
  <table>
66
- <caption><b>HuggingFace Open LLM Leaderboard V1</b></caption>
67
  <thead>
68
  <tr>
69
  <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
@@ -73,109 +73,73 @@ print(output)
73
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
74
  <th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th>
75
  <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
 
 
 
 
76
  <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
77
  </tr></thead>
78
  <tbody>
79
  <tr>
80
  <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
81
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.58</td>
82
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">77.67</td>
83
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.86</td>
84
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.02</td>
85
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.84</td>
86
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.99</td>
87
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.32</td>
 
 
 
 
88
  </tr>
89
  <tr>
90
  <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.3-2B-Base</td>
91
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
92
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
93
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
94
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
95
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
96
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
97
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
 
 
 
 
98
  </tr>
99
  <tr>
100
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td>
101
- <td style="text-align:center; background-color: #DAE8FF; color: black;">63.99</td>
102
- <td style="text-align:center; background-color: #DAE8FF; color: black;">83.27</td>
103
- <td style="text-align:center; background-color: #DAE8FF; color: black;">63.45</td>
104
- <td style="text-align:center; background-color: #DAE8FF; color: black;">51.29</td>
105
- <td style="text-align:center; background-color: #DAE8FF; color: black;">78.92</td>
106
- <td style="text-align:center; background-color: #DAE8FF; color: black;">60.19</td>
107
- <td style="text-align:center; background-color: #DAE8FF; color: black;">66.85</td>
 
 
 
 
108
  </tr>
109
 
110
  <tr>
111
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.3-8B-Base</td>
112
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
113
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
114
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
115
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
116
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
117
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
118
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
 
 
 
 
119
  </tr>
120
 
121
  </tbody></table>
122
 
123
- <table>
124
- <caption><b>HuggingFace Open LLM Leaderboard V2</b></caption>
125
- <thead>
126
- <tr>
127
- <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
128
- <th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th>
129
- <th style="text-align:center; background-color: #001d6c; color: white;">BBH</th>
130
- <th style="text-align:center; background-color: #001d6c; color: white;">MATH Lvl 5</th>
131
- <th style="text-align:center; background-color: #001d6c; color: white;">GPQA</th>
132
- <th style="text-align:center; background-color: #001d6c; color: white;">MUSR</th>
133
- <th style="text-align:center; background-color: #001d6c; color: white;">MMLU-Pro</th>
134
- <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
135
- <tbody>
136
- <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
137
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">35.22</td>
138
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">16.84</td>
139
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.59</td>
140
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.69</td>
141
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.9</td>
142
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.9</td>
143
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.19</td>
144
- </tr>
145
- <tr>
146
- <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
147
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
148
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
149
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
150
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
151
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
152
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
153
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> - </td>
154
- </tr>
155
- <tr>
156
- <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td>
157
- <td style="text-align:center; background-color: #DAE8FF; color: black;">42.21</td>
158
- <td style="text-align:center; background-color: #DAE8FF; color: black;">26.02</td>
159
- <td style="text-align:center; background-color: #DAE8FF; color: black;">9.52</td>
160
- <td style="text-align:center; background-color: #DAE8FF; color: black;">9.51</td>
161
- <td style="text-align:center; background-color: #DAE8FF; color: black;">8.36</td>
162
- <td style="text-align:center; background-color: #DAE8FF; color: black;">24.8</td>
163
- <td style="text-align:center; background-color: #DAE8FF; color: black;">20.07</td>
164
- </tr>
165
- <tr>
166
- <tr>
167
- <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.3-8B-Base</td>
168
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
169
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
170
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
171
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
172
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
173
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
174
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> - </td>
175
- </tr>
176
-
177
- </tbody></table> -->
178
-
179
  **Model Architecture:**
180
  Granite-3.3-2B-Base is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings.
181
  <table>
 
61
  ```
62
 
63
  **Evaluation Results:**
64
+
65
  <table>
66
+ <caption><b>Comparison with 3.1 Base models</b></caption>
67
  <thead>
68
  <tr>
69
  <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
 
73
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
74
  <th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th>
75
  <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
76
+ <th style="text-align:center; background-color: #001d6c; color: white;">DROP</th>
77
+ <th style="text-align:center; background-color: #001d6c; color: white;">NQ</th>
78
+ <th style="text-align:center; background-color: #001d6c; color: white;">AGIEval</th>
79
+ <th style="text-align:center; background-color: #001d6c; color: white;">TriviaQA</th>
80
  <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
81
  </tr></thead>
82
  <tbody>
83
  <tr>
84
  <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
85
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.83</td>
86
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">74.9</td>
87
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.87</td>
88
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">38.93</td>
89
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">71.8</td>
90
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.0</td>
91
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">30.08</td>
92
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">24.46</td>
93
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">38.24</td>
94
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">63.18</td>
95
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">49.63</td>
96
  </tr>
97
  <tr>
98
  <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.3-2B-Base</td>
99
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 47.49 </td>
100
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 73.2 </td>
101
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 54.33 </td>
102
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 40.83 </td>
103
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 70.4 </td>
104
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 50.0 </td>
105
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;"> 32.552 </td>
106
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">24.36</td>
107
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">38.78</td>
108
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">63.22</td>
109
+ <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">49.52</td>
110
  </tr>
111
  <tr>
112
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td>
113
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">53.51</td>
114
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">81.4</td>
115
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">64.28</td>
116
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">51.27</td>
117
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">76.2</td>
118
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">70.5</td>
119
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">45.87</td>
120
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">35.97</td>
121
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">48.99</td>
122
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">78.33</td>
123
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">60.63</td>
124
  </tr>
125
 
126
  <tr>
127
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.3-8B-Base</td>
128
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">50.84</td>
129
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">80.1</td>
130
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">63.89</td>
131
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">52.15</td>
132
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">74.4</td>
133
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">59.0</td>
134
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">36.14</td>
135
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">36.5</td>
136
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">49.3</td>
137
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">78.18</td>
138
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">58.05</td>
139
  </tr>
140
 
141
  </tbody></table>
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  **Model Architecture:**
144
  Granite-3.3-2B-Base is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings.
145
  <table>