qiyang-zhao commited on
Commit
b932224
1 Parent(s): 038bd78

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -92,37 +92,37 @@ We report in the following table our internal pipeline benchmarks:
92
  <tr>
93
  <td>IFEval</td>
94
  <td>17.91</td>
95
- <td><b>44.5</b></td>
96
  </tr>
97
  <tr>
98
  <td>MUSR</td>
99
  <td>4.87</td>
100
- <td><b>2.78</b></td>
101
  </tr>
102
  <tr>
103
  <td>GPQA</td>
104
  <td>1.83</td>
105
- <td><b>0</b></td>
106
  </tr>
107
  <tr>
108
  <td>BBH</td>
109
- <td><b>5.36</b></td>
110
  <td>2.24</td>
111
  </tr>
112
  <tr>
113
  <td>MMLU-PRO</td>
114
- <td><b>2.78</b></td>
115
  <td>1.93</td>
116
  </tr>
117
  <tr>
118
  <td>MATH</td>
119
  <td>0.26</td>
120
- <td><b>0.17</b></td>
121
  </tr>
122
  <tr>
123
  <td>Average</td>
124
  <td>5.5</td>
125
- <td><b>8.6</b></td>
126
  </tr>
127
  </tbody>
128
  </table>
 
92
  <tr>
93
  <td>IFEval</td>
94
  <td>17.91</td>
95
+ <td>44.5</td>
96
  </tr>
97
  <tr>
98
  <td>MUSR</td>
99
  <td>4.87</td>
100
+ <td>2.78</td>
101
  </tr>
102
  <tr>
103
  <td>GPQA</td>
104
  <td>1.83</td>
105
+ <td>0</td>
106
  </tr>
107
  <tr>
108
  <td>BBH</td>
109
+ <td>5.36</td>
110
  <td>2.24</td>
111
  </tr>
112
  <tr>
113
  <td>MMLU-PRO</td>
114
+ <td>2.78</td>
115
  <td>1.93</td>
116
  </tr>
117
  <tr>
118
  <td>MATH</td>
119
  <td>0.26</td>
120
+ <td>0.17</td>
121
  </tr>
122
  <tr>
123
  <td>Average</td>
124
  <td>5.5</td>
125
+ <td>8.6</td>
126
  </tr>
127
  </tbody>
128
  </table>