Update README.md
Browse files
README.md
CHANGED
@@ -115,24 +115,24 @@ We report in the following table our internal pipeline benchmarks:
|
|
115 |
<tbody>
|
116 |
<tr>
|
117 |
<td>MMLU</td>
|
118 |
-
<td>67.
|
119 |
-
<td>67.
|
120 |
-
<td>66.
|
121 |
-
<td>65.
|
122 |
</tr>
|
123 |
<tr>
|
124 |
<td>MMLU-PRO</td>
|
125 |
-
<td>40.
|
126 |
-
<td>40.
|
127 |
-
<td>39.
|
128 |
-
<td>39.
|
129 |
</tr>
|
130 |
<tr>
|
131 |
<td>IFEval</td>
|
132 |
-
<td>75.
|
133 |
-
<td>
|
134 |
-
<td>74.
|
135 |
-
<td>72.
|
136 |
</tr>
|
137 |
</tbody>
|
138 |
</table>
|
|
|
115 |
<tbody>
|
116 |
<tr>
|
117 |
<td>MMLU</td>
|
118 |
+
<td>67.7</td>
|
119 |
+
<td>67.6</td>
|
120 |
+
<td>66.4</td>
|
121 |
+
<td>65.6</td>
|
122 |
</tr>
|
123 |
<tr>
|
124 |
<td>MMLU-PRO</td>
|
125 |
+
<td>40.9</td>
|
126 |
+
<td>40.9</td>
|
127 |
+
<td>39.9</td>
|
128 |
+
<td>39.1</td>
|
129 |
</tr>
|
130 |
<tr>
|
131 |
<td>IFEval</td>
|
132 |
+
<td>75.1</td>
|
133 |
+
<td>77.0</td>
|
134 |
+
<td>74.8</td>
|
135 |
+
<td>72.2</td>
|
136 |
</tr>
|
137 |
</tbody>
|
138 |
</table>
|