melaseddik commited on
Commit
e29972d
1 Parent(s): bad033a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +115 -2
README.md CHANGED
@@ -167,7 +167,7 @@ print(tokenizer.decode(outputs[0]))
167
  </table>
168
 
169
 
170
- <table border="1" style="width: 100%; text-align: center;">
171
  <colgroup>
172
  <col style="width: 10%;">
173
  <col style="width: 10%;">
@@ -229,11 +229,124 @@ print(tokenizer.decode(outputs[0]))
229
  <td>16.1</td>
230
  <td>36.4</td>
231
  </tr>
232
- <!-- Continue with other rows similarly -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  </tbody>
234
  </table>
235
 
236
 
 
237
  # Citation
238
 
239
 
 
167
  </table>
168
 
169
 
170
+ <table border="1" style="width: 100%; text-align: center; border-collapse: collapse;">
171
  <colgroup>
172
  <col style="width: 10%;">
173
  <col style="width: 10%;">
 
229
  <td>16.1</td>
230
  <td>36.4</td>
231
  </tr>
232
+ <tr>
233
+ <td rowspan="2">Math</td>
234
+ <td>GSM8K (5-shot)</td>
235
+ <td>49.4</td>
236
+ <td>77.9</td>
237
+ <td>82.9</td>
238
+ <td>76.2</td>
239
+ <td>69.1</td>
240
+ <td>63.8</td>
241
+ <td>55.3</td>
242
+ <td>81.4</td>
243
+ </tr>
244
+ <tr>
245
+ <td>MATH(4-shot)</td>
246
+ <td>4.1</td>
247
+ <td>17.5</td>
248
+ <td>15.5</td>
249
+ <td>18.0</td>
250
+ <td>0</td>
251
+ <td>9.2</td>
252
+ <td>4.9</td>
253
+ <td>22.9</td>
254
+ </tr>
255
+ <tr>
256
+ <td rowspan="4">Reasoning</td>
257
+ <td>Arc Challenge (25-shot)</td>
258
+ <td>53.4</td>
259
+ <td>57.4</td>
260
+ <td>59.0</td>
261
+ <td>59.6</td>
262
+ <td>63.7</td>
263
+ <td>58.2</td>
264
+ <td>60.6</td>
265
+ <td>62.6</td>
266
+ </tr>
267
+ <tr>
268
+ <td>GPQA (0-shot)</td>
269
+ <td>31.0</td>
270
+ <td>31.9</td>
271
+ <td>33.0</td>
272
+ <td>35.5</td>
273
+ <td>0</td>
274
+ <td>36.6</td>
275
+ <td>28.8</td>
276
+ <td>34.1</td>
277
+ </tr>
278
+ <tr>
279
+ <td>MUSR (0-shot)</td>
280
+ <td>38.0</td>
281
+ <td>44.1</td>
282
+ <td>44.2</td>
283
+ <td>47.3</td>
284
+ <td>0</td>
285
+ <td>43.3</td>
286
+ <td>39.2</td>
287
+ <td>44.2</td>
288
+ </tr>
289
+ <tr>
290
+ <td>BBH (3-shot)</td>
291
+ <td>46.5</td>
292
+ <td>53.3</td>
293
+ <td>54.0</td>
294
+ <td>51.0</td>
295
+ <td>0</td>
296
+ <td>51.3</td>
297
+ <td>50.2</td>
298
+ <td>59.7</td>
299
+ </tr>
300
+ <tr>
301
+ <td rowspan="4">CommonSense Understanding</td>
302
+ <td>PIQA (0-shot)</td>
303
+ <td>80.3</td>
304
+ <td>79.8</td>
305
+ <td>78.7</td>
306
+ <td>77.7</td>
307
+ <td>81.4</td>
308
+ <td>79.8</td>
309
+ <td>81.4</td>
310
+ <td>79.1</td>
311
+ </tr>
312
+ <tr>
313
+ <td>SciQ (0-shot)</td>
314
+ <td>96.3</td>
315
+ <td>95.9</td>
316
+ <td>96.6</td>
317
+ <td>95.3</td>
318
+ <td>97.2</td>
319
+ <td>95.8</td>
320
+ <td>96.4</td>
321
+ <td>96.0</td>
322
+ </tr>
323
+ <tr>
324
+ <td>Winogrande (0-shot)</td>
325
+ <td>74.0</td>
326
+ <td>72.1</td>
327
+ <td>72.9</td>
328
+ <td>71.0</td>
329
+ <td>74.2</td>
330
+ <td>72.7</td>
331
+ <td>73.2</td>
332
+ <td>73.6</td>
333
+ </tr>
334
+ <tr>
335
+ <td>OpenbookQA (0-shot)</td>
336
+ <td>33.4</td>
337
+ <td>35.2</td>
338
+ <td>33.6</td>
339
+ <td>31.4</td>
340
+ <td>34.0</td>
341
+ <td>35.4</td>
342
+ <td>36.4</td>
343
+ <td>34.0</td>
344
+ </tr>
345
  </tbody>
346
  </table>
347
 
348
 
349
+
350
  # Citation
351
 
352