qiyang-zhao
commited on
Commit
•
b932224
1
Parent(s):
038bd78
Update README.md
Browse files
README.md
CHANGED
@@ -92,37 +92,37 @@ We report in the following table our internal pipeline benchmarks:
|
|
92 |
<tr>
|
93 |
<td>IFEval</td>
|
94 |
<td>17.91</td>
|
95 |
-
<td
|
96 |
</tr>
|
97 |
<tr>
|
98 |
<td>MUSR</td>
|
99 |
<td>4.87</td>
|
100 |
-
<td
|
101 |
</tr>
|
102 |
<tr>
|
103 |
<td>GPQA</td>
|
104 |
<td>1.83</td>
|
105 |
-
<td
|
106 |
</tr>
|
107 |
<tr>
|
108 |
<td>BBH</td>
|
109 |
-
<td
|
110 |
<td>2.24</td>
|
111 |
</tr>
|
112 |
<tr>
|
113 |
<td>MMLU-PRO</td>
|
114 |
-
<td
|
115 |
<td>1.93</td>
|
116 |
</tr>
|
117 |
<tr>
|
118 |
<td>MATH</td>
|
119 |
<td>0.26</td>
|
120 |
-
<td
|
121 |
</tr>
|
122 |
<tr>
|
123 |
<td>Average</td>
|
124 |
<td>5.5</td>
|
125 |
-
<td
|
126 |
</tr>
|
127 |
</tbody>
|
128 |
</table>
|
|
|
92 |
<tr>
|
93 |
<td>IFEval</td>
|
94 |
<td>17.91</td>
|
95 |
+
<td>44.5</td>
|
96 |
</tr>
|
97 |
<tr>
|
98 |
<td>MUSR</td>
|
99 |
<td>4.87</td>
|
100 |
+
<td>2.78</td>
|
101 |
</tr>
|
102 |
<tr>
|
103 |
<td>GPQA</td>
|
104 |
<td>1.83</td>
|
105 |
+
<td>0</td>
|
106 |
</tr>
|
107 |
<tr>
|
108 |
<td>BBH</td>
|
109 |
+
<td>5.36</td>
|
110 |
<td>2.24</td>
|
111 |
</tr>
|
112 |
<tr>
|
113 |
<td>MMLU-PRO</td>
|
114 |
+
<td>2.78</td>
|
115 |
<td>1.93</td>
|
116 |
</tr>
|
117 |
<tr>
|
118 |
<td>MATH</td>
|
119 |
<td>0.26</td>
|
120 |
+
<td>0.17</td>
|
121 |
</tr>
|
122 |
<tr>
|
123 |
<td>Average</td>
|
124 |
<td>5.5</td>
|
125 |
+
<td>8.6</td>
|
126 |
</tr>
|
127 |
</tbody>
|
128 |
</table>
|