Atlas-Pro-Evals / index.html
Spestly's picture
Update index.html
5dd79ce verified
<style type="text/css">
#T_7aef5 td {
overflow-wrap: break-word;
max-width: 1px;
}
#T_7aef5 .col_heading {
width: 14.285714285714286%;
}
#T_7aef5_row15_col0, #T_7aef5_row105_col2, #T_7aef5_row118_col1 {
background-color: #f7cbe4;
color: #000000;
}
#T_7aef5_row15_col1, #T_7aef5_row31_col4 {
background-color: #f9eff4;
color: #000000;
}
#T_7aef5_row15_col2, #T_7aef5_row34_col4 {
background-color: #fbe6f1;
color: #000000;
}
#T_7aef5_row15_col3, #T_7aef5_row58_col4, #T_7aef5_row85_col6, #T_7aef5_row109_col0 {
background-color: #f4bfdf;
color: #000000;
}
#T_7aef5_row15_col4, #T_7aef5_row28_col3, #T_7aef5_row46_col3, #T_7aef5_row85_col0, #T_7aef5_row107_col4 {
background-color: #f6c7e3;
color: #000000;
}
#T_7aef5_row15_col5, #T_7aef5_row15_col6, #T_7aef5_row31_col3, #T_7aef5_row40_col1, #T_7aef5_row52_col2 {
background-color: #f9eef4;
color: #000000;
}
#T_7aef5_row19_col0 {
background-color: #a9d874;
color: #000000;
}
#T_7aef5_row19_col1 {
background-color: #549825;
color: #f1f1f1;
}
#T_7aef5_row19_col2 {
background-color: #cdeaa7;
color: #000000;
}
#T_7aef5_row19_col3, #T_7aef5_row70_col0, #T_7aef5_row115_col0 {
background-color: #f7f7f6;
color: #000000;
}
#T_7aef5_row19_col4, #T_7aef5_row88_col6 {
background-color: #f1f6ea;
color: #000000;
}
#T_7aef5_row19_col5 {
background-color: #7fbc41;
color: #000000;
}
#T_7aef5_row19_col6 {
background-color: #8fc654;
color: #000000;
}
#T_7aef5_row22_col0, #T_7aef5_row52_col5 {
background-color: #f5f7f3;
color: #000000;
}
#T_7aef5_row22_col1, #T_7aef5_row31_col5 {
background-color: #edf6df;
color: #000000;
}
#T_7aef5_row22_col2 {
background-color: #ebf6db;
color: #000000;
}
#T_7aef5_row22_col3, #T_7aef5_row22_col4, #T_7aef5_row37_col3, #T_7aef5_row70_col1 {
background-color: #f4f7f0;
color: #000000;
}
#T_7aef5_row22_col5 {
background-color: #eaf5d9;
color: #000000;
}
#T_7aef5_row22_col6, #T_7aef5_row37_col5 {
background-color: #d8efb9;
color: #000000;
}
#T_7aef5_row25_col0, #T_7aef5_row100_col3, #T_7aef5_row145_col1 {
background-color: #eeabd2;
color: #000000;
}
#T_7aef5_row25_col1, #T_7aef5_row49_col6 {
background-color: #eff6e4;
color: #000000;
}
#T_7aef5_row25_col2 {
background-color: #fce3f0;
color: #000000;
}
#T_7aef5_row25_col3, #T_7aef5_row40_col3 {
background-color: #df7cb1;
color: #f1f1f1;
}
#T_7aef5_row25_col4 {
background-color: #fbd9ec;
color: #000000;
}
#T_7aef5_row25_col5, #T_7aef5_row70_col4, #T_7aef5_row73_col4 {
background-color: #f8f2f5;
color: #000000;
}
#T_7aef5_row25_col6, #T_7aef5_row61_col6 {
background-color: #faecf3;
color: #000000;
}
#T_7aef5_row28_col0, #T_7aef5_row130_col0 {
background-color: #fbe7f2;
color: #000000;
}
#T_7aef5_row28_col1, #T_7aef5_row73_col1 {
background-color: #ecf6de;
color: #000000;
}
#T_7aef5_row28_col2, #T_7aef5_row64_col6, #T_7aef5_row70_col6 {
background-color: #e2f3ca;
color: #000000;
}
#T_7aef5_row28_col4 {
background-color: #f2badc;
color: #000000;
}
#T_7aef5_row28_col5, #T_7aef5_row73_col3, #T_7aef5_row142_col2 {
background-color: #f3f6ed;
color: #000000;
}
#T_7aef5_row28_col6, #T_7aef5_row100_col0, #T_7aef5_row145_col2 {
background-color: #efb0d6;
color: #000000;
}
#T_7aef5_row31_col0, #T_7aef5_row105_col4 {
background-color: #f9f1f5;
color: #000000;
}
#T_7aef5_row31_col1, #T_7aef5_row37_col1 {
background-color: #e8f5d5;
color: #000000;
}
#T_7aef5_row31_col2, #T_7aef5_row70_col3, #T_7aef5_row73_col0, #T_7aef5_row142_col5 {
background-color: #f0f6e7;
color: #000000;
}
#T_7aef5_row31_col6, #T_7aef5_row37_col4, #T_7aef5_row46_col4, #T_7aef5_row52_col4 {
background-color: #f8f3f6;
color: #000000;
}
#T_7aef5_row34_col0, #T_7aef5_row43_col0, #T_7aef5_row124_col1 {
background-color: #d24c97;
color: #f1f1f1;
}
#T_7aef5_row34_col1 {
background-color: #faeaf2;
color: #000000;
}
#T_7aef5_row34_col2, #T_7aef5_row55_col1 {
background-color: #fad6ea;
color: #000000;
}
#T_7aef5_row34_col3, #T_7aef5_row67_col4 {
background-color: #e283b7;
color: #f1f1f1;
}
#T_7aef5_row34_col5, #T_7aef5_row55_col0, #T_7aef5_row90_col1 {
background-color: #f5c6e2;
color: #000000;
}
#T_7aef5_row34_col6, #T_7aef5_row109_col2, #T_7aef5_row148_col2 {
background-color: #f9d3e8;
color: #000000;
}
#T_7aef5_row37_col0, #T_7aef5_row49_col0, #T_7aef5_row55_col5, #T_7aef5_row88_col2 {
background-color: #f7f6f7;
color: #000000;
}
#T_7aef5_row37_col2, #T_7aef5_row46_col6, #T_7aef5_row52_col0, #T_7aef5_row64_col5, #T_7aef5_row142_col0, #T_7aef5_row142_col6 {
background-color: #f6f7f5;
color: #000000;
}
#T_7aef5_row37_col6 {
background-color: #c6e79c;
color: #000000;
}
#T_7aef5_row40_col0, #T_7aef5_row103_col2, #T_7aef5_row145_col6 {
background-color: #e388ba;
color: #f1f1f1;
}
#T_7aef5_row40_col2, #T_7aef5_row76_col3, #T_7aef5_row97_col3, #T_7aef5_row145_col0 {
background-color: #e590bf;
color: #f1f1f1;
}
#T_7aef5_row40_col4 {
background-color: #eeadd4;
color: #000000;
}
#T_7aef5_row40_col5, #T_7aef5_row55_col3 {
background-color: #f8cee6;
color: #000000;
}
#T_7aef5_row40_col6 {
background-color: #fbe8f2;
color: #000000;
}
#T_7aef5_row43_col1 {
background-color: #f6c9e3;
color: #000000;
}
#T_7aef5_row43_col2, #T_7aef5_row61_col4, #T_7aef5_row82_col2 {
background-color: #d861a2;
color: #f1f1f1;
}
#T_7aef5_row43_col3, #T_7aef5_row67_col3, #T_7aef5_row79_col1, #T_7aef5_row82_col6 {
background-color: #cf4191;
color: #f1f1f1;
}
#T_7aef5_row43_col4, #T_7aef5_row79_col2, #T_7aef5_row79_col5 {
background-color: #d34f99;
color: #f1f1f1;
}
#T_7aef5_row43_col5, #T_7aef5_row130_col6, #T_7aef5_row136_col5, #T_7aef5_row148_col6 {
background-color: #f4c1df;
color: #000000;
}
#T_7aef5_row43_col6, #T_7aef5_row58_col6, #T_7aef5_row111_col1, #T_7aef5_row138_col1 {
background-color: #fcdbed;
color: #000000;
}
#T_7aef5_row46_col0 {
background-color: #fde2f0;
color: #000000;
}
#T_7aef5_row46_col1 {
background-color: #bbe28a;
color: #000000;
}
#T_7aef5_row46_col2 {
background-color: #fde0ef;
color: #000000;
}
#T_7aef5_row46_col5 {
background-color: #f5f7f2;
color: #000000;
}
#T_7aef5_row49_col1 {
background-color: #e9f5d6;
color: #000000;
}
#T_7aef5_row49_col2 {
background-color: #81bd44;
color: #000000;
}
#T_7aef5_row49_col3, #T_7aef5_row67_col1, #T_7aef5_row148_col0 {
background-color: #ea9fca;
color: #000000;
}
#T_7aef5_row49_col4, #T_7aef5_row90_col5, #T_7aef5_row97_col6 {
background-color: #f0b2d7;
color: #000000;
}
#T_7aef5_row49_col5 {
background-color: #83bf46;
color: #000000;
}
#T_7aef5_row52_col1 {
background-color: #c0e593;
color: #000000;
}
#T_7aef5_row52_col3, #T_7aef5_row58_col1, #T_7aef5_row58_col2, #T_7aef5_row64_col1, #T_7aef5_row109_col3 {
background-color: #fce5f1;
color: #000000;
}
#T_7aef5_row52_col6 {
background-color: #e1f3c7;
color: #000000;
}
#T_7aef5_row55_col2, #T_7aef5_row97_col0, #T_7aef5_row107_col1 {
background-color: #f3bcdd;
color: #000000;
}
#T_7aef5_row55_col4 {
background-color: #c82884;
color: #f1f1f1;
}
#T_7aef5_row55_col6, #T_7aef5_row138_col3 {
background-color: #f8d0e7;
color: #000000;
}
#T_7aef5_row58_col0, #T_7aef5_row121_col1 {
background-color: #e897c4;
color: #000000;
}
#T_7aef5_row58_col3, #T_7aef5_row79_col0, #T_7aef5_row107_col0 {
background-color: #e07eb3;
color: #f1f1f1;
}
#T_7aef5_row58_col5, #T_7aef5_row64_col2, #T_7aef5_row109_col1 {
background-color: #fbe9f2;
color: #000000;
}
#T_7aef5_row61_col0, #T_7aef5_row67_col2, #T_7aef5_row100_col2, #T_7aef5_row100_col6 {
background-color: #eba3cd;
color: #000000;
}
#T_7aef5_row61_col1, #T_7aef5_row88_col0, #T_7aef5_row88_col1, #T_7aef5_row88_col3, #T_7aef5_row88_col4, #T_7aef5_row88_col5, #T_7aef5_row109_col4 {
background-color: #f8f4f6;
color: #000000;
}
#T_7aef5_row61_col2, #T_7aef5_row97_col5, #T_7aef5_row100_col5 {
background-color: #f2b8db;
color: #000000;
}
#T_7aef5_row61_col3, #T_7aef5_row136_col0 {
background-color: #dc70aa;
color: #f1f1f1;
}
#T_7aef5_row61_col5, #T_7aef5_row67_col6, #T_7aef5_row148_col1 {
background-color: #fad4e9;
color: #000000;
}
#T_7aef5_row64_col0, #T_7aef5_row145_col4 {
background-color: #e181b5;
color: #f1f1f1;
}
#T_7aef5_row64_col3, #T_7aef5_row94_col4, #T_7aef5_row136_col4 {
background-color: #e48bbc;
color: #f1f1f1;
}
#T_7aef5_row64_col4, #T_7aef5_row138_col4, #T_7aef5_row148_col3 {
background-color: #f9d1e8;
color: #000000;
}
#T_7aef5_row67_col0, #T_7aef5_row76_col1, #T_7aef5_row118_col6 {
background-color: #cc368b;
color: #f1f1f1;
}
#T_7aef5_row67_col5 {
background-color: #fddeee;
color: #000000;
}
#T_7aef5_row70_col2 {
background-color: #cbe9a4;
color: #000000;
}
#T_7aef5_row70_col5 {
background-color: #d6eeb6;
color: #000000;
}
#T_7aef5_row73_col2 {
background-color: #a1d26a;
color: #000000;
}
#T_7aef5_row73_col5 {
background-color: #9acd61;
color: #000000;
}
#T_7aef5_row73_col6 {
background-color: #b2dd7f;
color: #000000;
}
#T_7aef5_row76_col0, #T_7aef5_row133_col0 {
background-color: #b51370;
color: #f1f1f1;
}
#T_7aef5_row76_col2, #T_7aef5_row90_col3, #T_7aef5_row94_col6, #T_7aef5_row148_col5 {
background-color: #e89ac6;
color: #000000;
}
#T_7aef5_row76_col4, #T_7aef5_row94_col5, #T_7aef5_row107_col2 {
background-color: #e692c1;
color: #000000;
}
#T_7aef5_row76_col5, #T_7aef5_row127_col6 {
background-color: #c2197a;
color: #f1f1f1;
}
#T_7aef5_row76_col6, #T_7aef5_row111_col0 {
background-color: #e99cc8;
color: #000000;
}
#T_7aef5_row79_col3 {
background-color: #e286b8;
color: #f1f1f1;
}
#T_7aef5_row79_col4 {
background-color: #c72482;
color: #f1f1f1;
}
#T_7aef5_row79_col6, #T_7aef5_row82_col3 {
background-color: #d14895;
color: #f1f1f1;
}
#T_7aef5_row82_col0, #T_7aef5_row111_col6 {
background-color: #d65a9f;
color: #f1f1f1;
}
#T_7aef5_row82_col1, #T_7aef5_row82_col4, #T_7aef5_row136_col3 {
background-color: #c92b86;
color: #f1f1f1;
}
#T_7aef5_row82_col5, #T_7aef5_row121_col0 {
background-color: #ca2f88;
color: #f1f1f1;
}
#T_7aef5_row85_col1, #T_7aef5_row94_col1, #T_7aef5_row136_col6 {
background-color: #f3bdde;
color: #000000;
}
#T_7aef5_row85_col2, #T_7aef5_row97_col1, #T_7aef5_row115_col6, #T_7aef5_row138_col6, #T_7aef5_row145_col5 {
background-color: #f7cce5;
color: #000000;
}
#T_7aef5_row85_col3, #T_7aef5_row85_col5, #T_7aef5_row100_col1 {
background-color: #f5c2e0;
color: #000000;
}
#T_7aef5_row85_col4, #T_7aef5_row100_col4 {
background-color: #f1b7da;
color: #000000;
}
#T_7aef5_row90_col0, #T_7aef5_row105_col0, #T_7aef5_row136_col1, #T_7aef5_row136_col2 {
background-color: #f1b5d9;
color: #000000;
}
#T_7aef5_row90_col2, #T_7aef5_row94_col0, #T_7aef5_row94_col2, #T_7aef5_row97_col2, #T_7aef5_row127_col0 {
background-color: #eba1cb;
color: #000000;
}
#T_7aef5_row90_col4, #T_7aef5_row90_col6, #T_7aef5_row107_col3 {
background-color: #eda8d1;
color: #000000;
}
#T_7aef5_row94_col3, #T_7aef5_row145_col3 {
background-color: #e58dbe;
color: #f1f1f1;
}
#T_7aef5_row97_col4, #T_7aef5_row103_col3 {
background-color: #eca6cf;
color: #000000;
}
#T_7aef5_row103_col0 {
background-color: #db6ca8;
color: #f1f1f1;
}
#T_7aef5_row103_col1 {
background-color: #e795c3;
color: #000000;
}
#T_7aef5_row103_col4, #T_7aef5_row138_col0 {
background-color: #f5c4e1;
color: #000000;
}
#T_7aef5_row103_col5 {
background-color: #b91574;
color: #f1f1f1;
}
#T_7aef5_row103_col6 {
background-color: #b9e187;
color: #000000;
}
#T_7aef5_row105_col1, #T_7aef5_row138_col5 {
background-color: #fbd8eb;
color: #000000;
}
#T_7aef5_row105_col3, #T_7aef5_row127_col1 {
background-color: #fce4f0;
color: #000000;
}
#T_7aef5_row105_col5 {
background-color: #d75ea1;
color: #f1f1f1;
}
#T_7aef5_row105_col6 {
background-color: #88c24c;
color: #000000;
}
#T_7aef5_row107_col5 {
background-color: #c01879;
color: #f1f1f1;
}
#T_7aef5_row107_col6 {
background-color: #95cb5c;
color: #000000;
}
#T_7aef5_row109_col5 {
background-color: #d965a4;
color: #f1f1f1;
}
#T_7aef5_row109_col6 {
background-color: #71b038;
color: #f1f1f1;
}
#T_7aef5_row111_col2, #T_7aef5_row118_col5 {
background-color: #970559;
color: #f1f1f1;
}
#T_7aef5_row111_col3, #T_7aef5_row111_col4, #T_7aef5_row115_col3, #T_7aef5_row115_col4, #T_7aef5_row118_col3, #T_7aef5_row118_col4, #T_7aef5_row121_col3, #T_7aef5_row121_col4, #T_7aef5_row124_col3, #T_7aef5_row124_col4, #T_7aef5_row127_col3, #T_7aef5_row127_col4, #T_7aef5_row130_col3, #T_7aef5_row130_col4, #T_7aef5_row133_col3, #T_7aef5_row133_col4 {
background-color: #8e0152;
color: #f1f1f1;
}
#T_7aef5_row111_col5 {
background-color: #aa0e68;
color: #f1f1f1;
}
#T_7aef5_row115_col1 {
background-color: #ddf1c1;
color: #000000;
}
#T_7aef5_row115_col2, #T_7aef5_row118_col2 {
background-color: #9b075c;
color: #f1f1f1;
}
#T_7aef5_row115_col5 {
background-color: #c41a7c;
color: #f1f1f1;
}
#T_7aef5_row118_col0 {
background-color: #d4539b;
color: #f1f1f1;
}
#T_7aef5_row121_col2, #T_7aef5_row124_col6 {
background-color: #9d085e;
color: #f1f1f1;
}
#T_7aef5_row121_col5, #T_7aef5_row133_col5 {
background-color: #99065a;
color: #f1f1f1;
}
#T_7aef5_row121_col6 {
background-color: #bb1675;
color: #f1f1f1;
}
#T_7aef5_row124_col0 {
background-color: #b1116d;
color: #f1f1f1;
}
#T_7aef5_row124_col2, #T_7aef5_row124_col5, #T_7aef5_row130_col2, #T_7aef5_row133_col2 {
background-color: #900254;
color: #f1f1f1;
}
#T_7aef5_row127_col2 {
background-color: #940457;
color: #f1f1f1;
}
#T_7aef5_row127_col5, #T_7aef5_row133_col6 {
background-color: #a60c65;
color: #f1f1f1;
}
#T_7aef5_row130_col1 {
background-color: #d0ecad;
color: #000000;
}
#T_7aef5_row130_col5 {
background-color: #c51d7e;
color: #f1f1f1;
}
#T_7aef5_row133_col1 {
background-color: #cb3289;
color: #f1f1f1;
}
#T_7aef5_row138_col2, #T_7aef5_row148_col4 {
background-color: #fcdded;
color: #000000;
}
#T_7aef5_row142_col1 {
background-color: #f3f7ef;
color: #000000;
}
#T_7aef5_row142_col3, #T_7aef5_row142_col4 {
background-color: #f8f5f6;
color: #000000;
}
</style>
<table id="T_7aef5">
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T_7aef5_level0_col0" class="col_heading level0 col0" >Spestly/Atlas-Pro-1.5B-Preview</th>
<th id="T_7aef5_level0_col1" class="col_heading level0 col1" >Spestly/Atlas-Pro-7B-Preview</th>
<th id="T_7aef5_level0_col2" class="col_heading level0 col2" >01-ai/Yi-6B</th>
<th id="T_7aef5_level0_col3" class="col_heading level0 col3" >deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</th>
<th id="T_7aef5_level0_col4" class="col_heading level0 col4" >deepseek-ai/DeepSeek-R1-Distill-Qwen-7B</th>
<th id="T_7aef5_level0_col5" class="col_heading level0 col5" >meta-llama/Meta-Llama-3.1-8B</th>
<th id="T_7aef5_level0_col6" class="col_heading level0 col6" >meta-llama/Llama-3.2-3B-Instruct</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T_7aef5_level0_row15" class="row_heading level0 row15" >bbh.acc_norm</th>
<td id="T_7aef5_row15_col0" class="data row15 col0" >0.348030</td>
<td id="T_7aef5_row15_col1" class="data row15 col1" >0.465891</td>
<td id="T_7aef5_row15_col2" class="data row15 col2" >0.426662</td>
<td id="T_7aef5_row15_col3" class="data row15 col3" >0.321298</td>
<td id="T_7aef5_row15_col4" class="data row15 col4" >0.341087</td>
<td id="T_7aef5_row15_col5" class="data row15 col5" >0.463808</td>
<td id="T_7aef5_row15_col6" class="data row15 col6" >0.458601</td>
</tr>
<tr>
<th id="T_7aef5_level0_row19" class="row_heading level0 row19" >bbh_boolean_expressions.acc_norm</th>
<td id="T_7aef5_row19_col0" class="data row19 col0" >0.724000</td>
<td id="T_7aef5_row19_col1" class="data row19 col1" >0.884000</td>
<td id="T_7aef5_row19_col2" class="data row19 col2" >0.656000</td>
<td id="T_7aef5_row19_col3" class="data row19 col3" >0.500000</td>
<td id="T_7aef5_row19_col4" class="data row19 col4" >0.532000</td>
<td id="T_7aef5_row19_col5" class="data row19 col5" >0.800000</td>
<td id="T_7aef5_row19_col6" class="data row19 col6" >0.772000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row22" class="row_heading level0 row22" >bbh_causal_judgement.acc_norm</th>
<td id="T_7aef5_row22_col0" class="data row22 col0" >0.508021</td>
<td id="T_7aef5_row22_col1" class="data row22 col1" >0.561497</td>
<td id="T_7aef5_row22_col2" class="data row22 col2" >0.572193</td>
<td id="T_7aef5_row22_col3" class="data row22 col3" >0.518717</td>
<td id="T_7aef5_row22_col4" class="data row22 col4" >0.518717</td>
<td id="T_7aef5_row22_col5" class="data row22 col5" >0.577540</td>
<td id="T_7aef5_row22_col6" class="data row22 col6" >0.631016</td>
</tr>
<tr>
<th id="T_7aef5_level0_row25" class="row_heading level0 row25" >bbh_date_understanding.acc_norm</th>
<td id="T_7aef5_row25_col0" class="data row25 col0" >0.284000</td>
<td id="T_7aef5_row25_col1" class="data row25 col1" >0.548000</td>
<td id="T_7aef5_row25_col2" class="data row25 col2" >0.412000</td>
<td id="T_7aef5_row25_col3" class="data row25 col3" >0.208000</td>
<td id="T_7aef5_row25_col4" class="data row25 col4" >0.384000</td>
<td id="T_7aef5_row25_col5" class="data row25 col5" >0.480000</td>
<td id="T_7aef5_row25_col6" class="data row25 col6" >0.452000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row28" class="row_heading level0 row28" >bbh_disambiguation_qa.acc_norm</th>
<td id="T_7aef5_row28_col0" class="data row28 col0" >0.432000</td>
<td id="T_7aef5_row28_col1" class="data row28 col1" >0.564000</td>
<td id="T_7aef5_row28_col2" class="data row28 col2" >0.608000</td>
<td id="T_7aef5_row28_col3" class="data row28 col3" >0.340000</td>
<td id="T_7aef5_row28_col4" class="data row28 col4" >0.312000</td>
<td id="T_7aef5_row28_col5" class="data row28 col5" >0.524000</td>
<td id="T_7aef5_row28_col6" class="data row28 col6" >0.292000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row31" class="row_heading level0 row31" >bbh_formal_fallacies.acc_norm</th>
<td id="T_7aef5_row31_col0" class="data row31 col0" >0.476000</td>
<td id="T_7aef5_row31_col1" class="data row31 col1" >0.588000</td>
<td id="T_7aef5_row31_col2" class="data row31 col2" >0.540000</td>
<td id="T_7aef5_row31_col3" class="data row31 col3" >0.464000</td>
<td id="T_7aef5_row31_col4" class="data row31 col4" >0.468000</td>
<td id="T_7aef5_row31_col5" class="data row31 col5" >0.560000</td>
<td id="T_7aef5_row31_col6" class="data row31 col6" >0.484000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row34" class="row_heading level0 row34" >bbh_geometric_shapes.acc_norm</th>
<td id="T_7aef5_row34_col0" class="data row34 col0" >0.156000</td>
<td id="T_7aef5_row34_col1" class="data row34 col1" >0.444000</td>
<td id="T_7aef5_row34_col2" class="data row34 col2" >0.376000</td>
<td id="T_7aef5_row34_col3" class="data row34 col3" >0.220000</td>
<td id="T_7aef5_row34_col4" class="data row34 col4" >0.428000</td>
<td id="T_7aef5_row34_col5" class="data row34 col5" >0.336000</td>
<td id="T_7aef5_row34_col6" class="data row34 col6" >0.368000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row37" class="row_heading level0 row37" >bbh_hyperbaton.acc_norm</th>
<td id="T_7aef5_row37_col0" class="data row37 col0" >0.496000</td>
<td id="T_7aef5_row37_col1" class="data row37 col1" >0.588000</td>
<td id="T_7aef5_row37_col2" class="data row37 col2" >0.504000</td>
<td id="T_7aef5_row37_col3" class="data row37 col3" >0.516000</td>
<td id="T_7aef5_row37_col4" class="data row37 col4" >0.484000</td>
<td id="T_7aef5_row37_col5" class="data row37 col5" >0.632000</td>
<td id="T_7aef5_row37_col6" class="data row37 col6" >0.668000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row40" class="row_heading level0 row40" >bbh_logical_deduction_five_objects.acc_norm</th>
<td id="T_7aef5_row40_col0" class="data row40 col0" >0.228000</td>
<td id="T_7aef5_row40_col1" class="data row40 col1" >0.464000</td>
<td id="T_7aef5_row40_col2" class="data row40 col2" >0.240000</td>
<td id="T_7aef5_row40_col3" class="data row40 col3" >0.208000</td>
<td id="T_7aef5_row40_col4" class="data row40 col4" >0.288000</td>
<td id="T_7aef5_row40_col5" class="data row40 col5" >0.356000</td>
<td id="T_7aef5_row40_col6" class="data row40 col6" >0.436000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row43" class="row_heading level0 row43" >bbh_logical_deduction_seven_objects.acc_norm</th>
<td id="T_7aef5_row43_col0" class="data row43 col0" >0.156000</td>
<td id="T_7aef5_row43_col1" class="data row43 col1" >0.344000</td>
<td id="T_7aef5_row43_col2" class="data row43 col2" >0.176000</td>
<td id="T_7aef5_row43_col3" class="data row43 col3" >0.144000</td>
<td id="T_7aef5_row43_col4" class="data row43 col4" >0.160000</td>
<td id="T_7aef5_row43_col5" class="data row43 col5" >0.328000</td>
<td id="T_7aef5_row43_col6" class="data row43 col6" >0.388000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row46" class="row_heading level0 row46" >bbh_logical_deduction_three_objects.acc_norm</th>
<td id="T_7aef5_row46_col0" class="data row46 col0" >0.408000</td>
<td id="T_7aef5_row46_col1" class="data row46 col1" >0.692000</td>
<td id="T_7aef5_row46_col2" class="data row46 col2" >0.400000</td>
<td id="T_7aef5_row46_col3" class="data row46 col3" >0.340000</td>
<td id="T_7aef5_row46_col4" class="data row46 col4" >0.484000</td>
<td id="T_7aef5_row46_col5" class="data row46 col5" >0.512000</td>
<td id="T_7aef5_row46_col6" class="data row46 col6" >0.504000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row49" class="row_heading level0 row49" >bbh_movie_recommendation.acc_norm</th>
<td id="T_7aef5_row49_col0" class="data row49 col0" >0.496000</td>
<td id="T_7aef5_row49_col1" class="data row49 col1" >0.584000</td>
<td id="T_7aef5_row49_col2" class="data row49 col2" >0.796000</td>
<td id="T_7aef5_row49_col3" class="data row49 col3" >0.264000</td>
<td id="T_7aef5_row49_col4" class="data row49 col4" >0.296000</td>
<td id="T_7aef5_row49_col5" class="data row49 col5" >0.792000</td>
<td id="T_7aef5_row49_col6" class="data row49 col6" >0.548000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row52" class="row_heading level0 row52" >bbh_navigate.acc_norm</th>
<td id="T_7aef5_row52_col0" class="data row52 col0" >0.504000</td>
<td id="T_7aef5_row52_col1" class="data row52 col1" >0.680000</td>
<td id="T_7aef5_row52_col2" class="data row52 col2" >0.464000</td>
<td id="T_7aef5_row52_col3" class="data row52 col3" >0.420000</td>
<td id="T_7aef5_row52_col4" class="data row52 col4" >0.484000</td>
<td id="T_7aef5_row52_col5" class="data row52 col5" >0.508000</td>
<td id="T_7aef5_row52_col6" class="data row52 col6" >0.612000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row55" class="row_heading level0 row55" >bbh_object_counting.acc_norm</th>
<td id="T_7aef5_row55_col0" class="data row55 col0" >0.336000</td>
<td id="T_7aef5_row55_col1" class="data row55 col1" >0.376000</td>
<td id="T_7aef5_row55_col2" class="data row55 col2" >0.316000</td>
<td id="T_7aef5_row55_col3" class="data row55 col3" >0.356000</td>
<td id="T_7aef5_row55_col4" class="data row55 col4" >0.116000</td>
<td id="T_7aef5_row55_col5" class="data row55 col5" >0.496000</td>
<td id="T_7aef5_row55_col6" class="data row55 col6" >0.360000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row58" class="row_heading level0 row58" >bbh_penguins_in_a_table.acc_norm</th>
<td id="T_7aef5_row58_col0" class="data row58 col0" >0.253425</td>
<td id="T_7aef5_row58_col1" class="data row58 col1" >0.424658</td>
<td id="T_7aef5_row58_col2" class="data row58 col2" >0.424658</td>
<td id="T_7aef5_row58_col3" class="data row58 col3" >0.212329</td>
<td id="T_7aef5_row58_col4" class="data row58 col4" >0.321918</td>
<td id="T_7aef5_row58_col5" class="data row58 col5" >0.438356</td>
<td id="T_7aef5_row58_col6" class="data row58 col6" >0.390411</td>
</tr>
<tr>
<th id="T_7aef5_level0_row61" class="row_heading level0 row61" >bbh_reasoning_about_colored_objects.acc_norm</th>
<td id="T_7aef5_row61_col0" class="data row61 col0" >0.272000</td>
<td id="T_7aef5_row61_col1" class="data row61 col1" >0.488000</td>
<td id="T_7aef5_row61_col2" class="data row61 col2" >0.308000</td>
<td id="T_7aef5_row61_col3" class="data row61 col3" >0.192000</td>
<td id="T_7aef5_row61_col4" class="data row61 col4" >0.176000</td>
<td id="T_7aef5_row61_col5" class="data row61 col5" >0.372000</td>
<td id="T_7aef5_row61_col6" class="data row61 col6" >0.452000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row64" class="row_heading level0 row64" >bbh_ruin_names.acc_norm</th>
<td id="T_7aef5_row64_col0" class="data row64 col0" >0.216000</td>
<td id="T_7aef5_row64_col1" class="data row64 col1" >0.424000</td>
<td id="T_7aef5_row64_col2" class="data row64 col2" >0.440000</td>
<td id="T_7aef5_row64_col3" class="data row64 col3" >0.232000</td>
<td id="T_7aef5_row64_col4" class="data row64 col4" >0.364000</td>
<td id="T_7aef5_row64_col5" class="data row64 col5" >0.504000</td>
<td id="T_7aef5_row64_col6" class="data row64 col6" >0.608000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row67" class="row_heading level0 row67" >bbh_salient_translation_error_detection.acc_norm</th>
<td id="T_7aef5_row67_col0" class="data row67 col0" >0.132000</td>
<td id="T_7aef5_row67_col1" class="data row67 col1" >0.264000</td>
<td id="T_7aef5_row67_col2" class="data row67 col2" >0.272000</td>
<td id="T_7aef5_row67_col3" class="data row67 col3" >0.144000</td>
<td id="T_7aef5_row67_col4" class="data row67 col4" >0.220000</td>
<td id="T_7aef5_row67_col5" class="data row67 col5" >0.396000</td>
<td id="T_7aef5_row67_col6" class="data row67 col6" >0.372000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row70" class="row_heading level0 row70" >bbh_snarks.acc_norm</th>
<td id="T_7aef5_row70_col0" class="data row70 col0" >0.500000</td>
<td id="T_7aef5_row70_col1" class="data row70 col1" >0.516854</td>
<td id="T_7aef5_row70_col2" class="data row70 col2" >0.657303</td>
<td id="T_7aef5_row70_col3" class="data row70 col3" >0.539326</td>
<td id="T_7aef5_row70_col4" class="data row70 col4" >0.477528</td>
<td id="T_7aef5_row70_col5" class="data row70 col5" >0.634831</td>
<td id="T_7aef5_row70_col6" class="data row70 col6" >0.606742</td>
</tr>
<tr>
<th id="T_7aef5_level0_row73" class="row_heading level0 row73" >bbh_sports_understanding.acc_norm</th>
<td id="T_7aef5_row73_col0" class="data row73 col0" >0.540000</td>
<td id="T_7aef5_row73_col1" class="data row73 col1" >0.564000</td>
<td id="T_7aef5_row73_col2" class="data row73 col2" >0.740000</td>
<td id="T_7aef5_row73_col3" class="data row73 col3" >0.524000</td>
<td id="T_7aef5_row73_col4" class="data row73 col4" >0.480000</td>
<td id="T_7aef5_row73_col5" class="data row73 col5" >0.752000</td>
<td id="T_7aef5_row73_col6" class="data row73 col6" >0.708000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row76" class="row_heading level0 row76" >bbh_temporal_sequences.acc_norm</th>
<td id="T_7aef5_row76_col0" class="data row76 col0" >0.072000</td>
<td id="T_7aef5_row76_col1" class="data row76 col1" >0.132000</td>
<td id="T_7aef5_row76_col2" class="data row76 col2" >0.256000</td>
<td id="T_7aef5_row76_col3" class="data row76 col3" >0.240000</td>
<td id="T_7aef5_row76_col4" class="data row76 col4" >0.244000</td>
<td id="T_7aef5_row76_col5" class="data row76 col5" >0.096000</td>
<td id="T_7aef5_row76_col6" class="data row76 col6" >0.260000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row79" class="row_heading level0 row79" >bbh_tracking_shuffled_objects_five_objects.acc_norm</th>
<td id="T_7aef5_row79_col0" class="data row79 col0" >0.212000</td>
<td id="T_7aef5_row79_col1" class="data row79 col1" >0.144000</td>
<td id="T_7aef5_row79_col2" class="data row79 col2" >0.160000</td>
<td id="T_7aef5_row79_col3" class="data row79 col3" >0.224000</td>
<td id="T_7aef5_row79_col4" class="data row79 col4" >0.112000</td>
<td id="T_7aef5_row79_col5" class="data row79 col5" >0.160000</td>
<td id="T_7aef5_row79_col6" class="data row79 col6" >0.152000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row82" class="row_heading level0 row82" >bbh_tracking_shuffled_objects_seven_objects.acc_norm</th>
<td id="T_7aef5_row82_col0" class="data row82 col0" >0.168000</td>
<td id="T_7aef5_row82_col1" class="data row82 col1" >0.120000</td>
<td id="T_7aef5_row82_col2" class="data row82 col2" >0.176000</td>
<td id="T_7aef5_row82_col3" class="data row82 col3" >0.152000</td>
<td id="T_7aef5_row82_col4" class="data row82 col4" >0.120000</td>
<td id="T_7aef5_row82_col5" class="data row82 col5" >0.124000</td>
<td id="T_7aef5_row82_col6" class="data row82 col6" >0.144000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row85" class="row_heading level0 row85" >bbh_tracking_shuffled_objects_three_objects.acc_norm</th>
<td id="T_7aef5_row85_col0" class="data row85 col0" >0.340000</td>
<td id="T_7aef5_row85_col1" class="data row85 col1" >0.320000</td>
<td id="T_7aef5_row85_col2" class="data row85 col2" >0.352000</td>
<td id="T_7aef5_row85_col3" class="data row85 col3" >0.332000</td>
<td id="T_7aef5_row85_col4" class="data row85 col4" >0.304000</td>
<td id="T_7aef5_row85_col5" class="data row85 col5" >0.332000</td>
<td id="T_7aef5_row85_col6" class="data row85 col6" >0.324000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row88" class="row_heading level0 row88" >bbh_web_of_lies.acc_norm</th>
<td id="T_7aef5_row88_col0" class="data row88 col0" >0.488000</td>
<td id="T_7aef5_row88_col1" class="data row88 col1" >0.488000</td>
<td id="T_7aef5_row88_col2" class="data row88 col2" >0.496000</td>
<td id="T_7aef5_row88_col3" class="data row88 col3" >0.488000</td>
<td id="T_7aef5_row88_col4" class="data row88 col4" >0.488000</td>
<td id="T_7aef5_row88_col5" class="data row88 col5" >0.488000</td>
<td id="T_7aef5_row88_col6" class="data row88 col6" >0.532000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row90" class="row_heading level0 row90" >gpqa.acc_norm</th>
<td id="T_7aef5_row90_col0" class="data row90 col0" >0.296980</td>
<td id="T_7aef5_row90_col1" class="data row90 col1" >0.337248</td>
<td id="T_7aef5_row90_col2" class="data row90 col2" >0.269295</td>
<td id="T_7aef5_row90_col3" class="data row90 col3" >0.255872</td>
<td id="T_7aef5_row90_col4" class="data row90 col4" >0.279362</td>
<td id="T_7aef5_row90_col5" class="data row90 col5" >0.296141</td>
<td id="T_7aef5_row90_col6" class="data row90 col6" >0.278523</td>
</tr>
<tr>
<th id="T_7aef5_level0_row94" class="row_heading level0 row94" >gpqa_diamond.acc_norm</th>
<td id="T_7aef5_row94_col0" class="data row94 col0" >0.267677</td>
<td id="T_7aef5_row94_col1" class="data row94 col1" >0.318182</td>
<td id="T_7aef5_row94_col2" class="data row94 col2" >0.267677</td>
<td id="T_7aef5_row94_col3" class="data row94 col3" >0.237374</td>
<td id="T_7aef5_row94_col4" class="data row94 col4" >0.232323</td>
<td id="T_7aef5_row94_col5" class="data row94 col5" >0.242424</td>
<td id="T_7aef5_row94_col6" class="data row94 col6" >0.257576</td>
</tr>
<tr>
<th id="T_7aef5_level0_row97" class="row_heading level0 row97" >gpqa_extended.acc_norm</th>
<td id="T_7aef5_row97_col0" class="data row97 col0" >0.313187</td>
<td id="T_7aef5_row97_col1" class="data row97 col1" >0.351648</td>
<td id="T_7aef5_row97_col2" class="data row97 col2" >0.267399</td>
<td id="T_7aef5_row97_col3" class="data row97 col3" >0.239927</td>
<td id="T_7aef5_row97_col4" class="data row97 col4" >0.276557</td>
<td id="T_7aef5_row97_col5" class="data row97 col5" >0.305861</td>
<td id="T_7aef5_row97_col6" class="data row97 col6" >0.293040</td>
</tr>
<tr>
<th id="T_7aef5_level0_row100" class="row_heading level0 row100" >gpqa_main.acc_norm</th>
<td id="T_7aef5_row100_col0" class="data row100 col0" >0.290179</td>
<td id="T_7aef5_row100_col1" class="data row100 col1" >0.328125</td>
<td id="T_7aef5_row100_col2" class="data row100 col2" >0.272321</td>
<td id="T_7aef5_row100_col3" class="data row100 col3" >0.283482</td>
<td id="T_7aef5_row100_col4" class="data row100 col4" >0.303571</td>
<td id="T_7aef5_row100_col5" class="data row100 col5" >0.308036</td>
<td id="T_7aef5_row100_col6" class="data row100 col6" >0.270089</td>
</tr>
<tr>
<th id="T_7aef5_level0_row103" class="row_heading level0 row103" >ifeval.prompt_level_strict_acc</th>
<td id="T_7aef5_row103_col0" class="data row103 col0" >0.188540</td>
<td id="T_7aef5_row103_col1" class="data row103 col1" >0.249538</td>
<td id="T_7aef5_row103_col2" class="data row103 col2" >0.227357</td>
<td id="T_7aef5_row103_col3" class="data row103 col3" >0.275416</td>
<td id="T_7aef5_row103_col4" class="data row103 col4" >0.332717</td>
<td id="T_7aef5_row103_col5" class="data row103 col5" >0.081331</td>
<td id="T_7aef5_row103_col6" class="data row103 col6" >0.696858</td>
</tr>
<tr>
<th id="T_7aef5_level0_row105" class="row_heading level0 row105" >ifeval.inst_level_strict_acc</th>
<td id="T_7aef5_row105_col0" class="data row105 col0" >0.297362</td>
<td id="T_7aef5_row105_col1" class="data row105 col1" >0.381295</td>
<td id="T_7aef5_row105_col2" class="data row105 col2" >0.351319</td>
<td id="T_7aef5_row105_col3" class="data row105 col3" >0.417266</td>
<td id="T_7aef5_row105_col4" class="data row105 col4" >0.474820</td>
<td id="T_7aef5_row105_col5" class="data row105 col5" >0.172662</td>
<td id="T_7aef5_row105_col6" class="data row105 col6" >0.781775</td>
</tr>
<tr>
<th id="T_7aef5_level0_row107" class="row_heading level0 row107" >ifeval.prompt_level_loose_acc</th>
<td id="T_7aef5_row107_col0" class="data row107 col0" >0.214418</td>
<td id="T_7aef5_row107_col1" class="data row107 col1" >0.314233</td>
<td id="T_7aef5_row107_col2" class="data row107 col2" >0.243993</td>
<td id="T_7aef5_row107_col3" class="data row107 col3" >0.280961</td>
<td id="T_7aef5_row107_col4" class="data row107 col4" >0.340111</td>
<td id="T_7aef5_row107_col5" class="data row107 col5" >0.092421</td>
<td id="T_7aef5_row107_col6" class="data row107 col6" >0.757856</td>
</tr>
<tr>
<th id="T_7aef5_level0_row109" class="row_heading level0 row109" >ifeval.inst_level_loose_acc</th>
<td id="T_7aef5_row109_col0" class="data row109 col0" >0.323741</td>
<td id="T_7aef5_row109_col1" class="data row109 col1" >0.437650</td>
<td id="T_7aef5_row109_col2" class="data row109 col2" >0.368106</td>
<td id="T_7aef5_row109_col3" class="data row109 col3" >0.423261</td>
<td id="T_7aef5_row109_col4" class="data row109 col4" >0.484412</td>
<td id="T_7aef5_row109_col5" class="data row109 col5" >0.179856</td>
<td id="T_7aef5_row109_col6" class="data row109 col6" >0.826139</td>
</tr>
<tr>
<th id="T_7aef5_level0_row111" class="row_heading level0 row111" >math_hard.exact_match</th>
<td id="T_7aef5_row111_col0" class="data row111 col0" >0.258308</td>
<td id="T_7aef5_row111_col1" class="data row111 col1" >0.388973</td>
<td id="T_7aef5_row111_col2" class="data row111 col2" >0.015861</td>
<td id="T_7aef5_row111_col3" class="data row111 col3" >0.000000</td>
<td id="T_7aef5_row111_col4" class="data row111 col4" >0.000000</td>
<td id="T_7aef5_row111_col5" class="data row111 col5" >0.051360</td>
<td id="T_7aef5_row111_col6" class="data row111 col6" >0.171450</td>
</tr>
<tr>
<th id="T_7aef5_level0_row115" class="row_heading level0 row115" >math_algebra_hard.exact_match</th>
<td id="T_7aef5_row115_col0" class="data row115 col0" >0.501629</td>
<td id="T_7aef5_row115_col1" class="data row115 col1" >0.618893</td>
<td id="T_7aef5_row115_col2" class="data row115 col2" >0.026059</td>
<td id="T_7aef5_row115_col3" class="data row115 col3" >0.000000</td>
<td id="T_7aef5_row115_col4" class="data row115 col4" >0.000000</td>
<td id="T_7aef5_row115_col5" class="data row115 col5" >0.100977</td>
<td id="T_7aef5_row115_col6" class="data row115 col6" >0.351792</td>
</tr>
<tr>
<th id="T_7aef5_level0_row118" class="row_heading level0 row118" >math_counting_and_prob_hard.exact_match</th>
<td id="T_7aef5_row118_col0" class="data row118 col0" >0.162602</td>
<td id="T_7aef5_row118_col1" class="data row118 col1" >0.349593</td>
<td id="T_7aef5_row118_col2" class="data row118 col2" >0.024390</td>
<td id="T_7aef5_row118_col3" class="data row118 col3" >0.000000</td>
<td id="T_7aef5_row118_col4" class="data row118 col4" >0.000000</td>
<td id="T_7aef5_row118_col5" class="data row118 col5" >0.016260</td>
<td id="T_7aef5_row118_col6" class="data row118 col6" >0.130081</td>
</tr>
<tr>
<th id="T_7aef5_level0_row121" class="row_heading level0 row121" >math_geometry_hard.exact_match</th>
<td id="T_7aef5_row121_col0" class="data row121 col0" >0.121212</td>
<td id="T_7aef5_row121_col1" class="data row121 col1" >0.250000</td>
<td id="T_7aef5_row121_col2" class="data row121 col2" >0.030303</td>
<td id="T_7aef5_row121_col3" class="data row121 col3" >0.000000</td>
<td id="T_7aef5_row121_col4" class="data row121 col4" >0.000000</td>
<td id="T_7aef5_row121_col5" class="data row121 col5" >0.022727</td>
<td id="T_7aef5_row121_col6" class="data row121 col6" >0.083333</td>
</tr>
<tr>
<th id="T_7aef5_level0_row124" class="row_heading level0 row124" >math_intermediate_algebra_hard.exact_match</th>
<td id="T_7aef5_row124_col0" class="data row124 col0" >0.064286</td>
<td id="T_7aef5_row124_col1" class="data row124 col1" >0.153571</td>
<td id="T_7aef5_row124_col2" class="data row124 col2" >0.007143</td>
<td id="T_7aef5_row124_col3" class="data row124 col3" >0.000000</td>
<td id="T_7aef5_row124_col4" class="data row124 col4" >0.000000</td>
<td id="T_7aef5_row124_col5" class="data row124 col5" >0.007143</td>
<td id="T_7aef5_row124_col6" class="data row124 col6" >0.028571</td>
</tr>
<tr>
<th id="T_7aef5_level0_row127" class="row_heading level0 row127" >math_num_theory_hard.exact_match</th>
<td id="T_7aef5_row127_col0" class="data row127 col0" >0.266234</td>
<td id="T_7aef5_row127_col1" class="data row127 col1" >0.415584</td>
<td id="T_7aef5_row127_col2" class="data row127 col2" >0.012987</td>
<td id="T_7aef5_row127_col3" class="data row127 col3" >0.000000</td>
<td id="T_7aef5_row127_col4" class="data row127 col4" >0.000000</td>
<td id="T_7aef5_row127_col5" class="data row127 col5" >0.045455</td>
<td id="T_7aef5_row127_col6" class="data row127 col6" >0.097403</td>
</tr>
<tr>
<th id="T_7aef5_level0_row130" class="row_heading level0 row130" >math_prealgebra_hard.exact_match</th>
<td id="T_7aef5_row130_col0" class="data row130 col0" >0.430052</td>
<td id="T_7aef5_row130_col1" class="data row130 col1" >0.647668</td>
<td id="T_7aef5_row130_col2" class="data row130 col2" >0.005181</td>
<td id="T_7aef5_row130_col3" class="data row130 col3" >0.000000</td>
<td id="T_7aef5_row130_col4" class="data row130 col4" >0.000000</td>
<td id="T_7aef5_row130_col5" class="data row130 col5" >0.103627</td>
<td id="T_7aef5_row130_col6" class="data row130 col6" >0.326425</td>
</tr>
<tr>
<th id="T_7aef5_level0_row133" class="row_heading level0 row133" >math_precalculus_hard.exact_match</th>
<td id="T_7aef5_row133_col0" class="data row133 col0" >0.074074</td>
<td id="T_7aef5_row133_col1" class="data row133 col1" >0.125926</td>
<td id="T_7aef5_row133_col2" class="data row133 col2" >0.007407</td>
<td id="T_7aef5_row133_col3" class="data row133 col3" >0.000000</td>
<td id="T_7aef5_row133_col4" class="data row133 col4" >0.000000</td>
<td id="T_7aef5_row133_col5" class="data row133 col5" >0.022222</td>
<td id="T_7aef5_row133_col6" class="data row133 col6" >0.044444</td>
</tr>
<tr>
<th id="T_7aef5_level0_row136" class="row_heading level0 row136" >mmlu_pro.acc</th>
<td id="T_7aef5_row136_col0" class="data row136 col0" >0.192487</td>
<td id="T_7aef5_row136_col1" class="data row136 col1" >0.297041</td>
<td id="T_7aef5_row136_col2" class="data row136 col2" >0.299119</td>
<td id="T_7aef5_row136_col3" class="data row136 col3" >0.118684</td>
<td id="T_7aef5_row136_col4" class="data row136 col4" >0.232131</td>
<td id="T_7aef5_row136_col5" class="data row136 col5" >0.324551</td>
<td id="T_7aef5_row136_col6" class="data row136 col6" >0.319481</td>
</tr>
<tr>
<th id="T_7aef5_level0_row138" class="row_heading level0 row138" >musr.acc_norm</th>
<td id="T_7aef5_row138_col0" class="data row138 col0" >0.334656</td>
<td id="T_7aef5_row138_col1" class="data row138 col1" >0.390212</td>
<td id="T_7aef5_row138_col2" class="data row138 col2" >0.392857</td>
<td id="T_7aef5_row138_col3" class="data row138 col3" >0.362434</td>
<td id="T_7aef5_row138_col4" class="data row138 col4" >0.365079</td>
<td id="T_7aef5_row138_col5" class="data row138 col5" >0.382275</td>
<td id="T_7aef5_row138_col6" class="data row138 col6" >0.351852</td>
</tr>
<tr>
<th id="T_7aef5_level0_row142" class="row_heading level0 row142" >musr_murder_mysteries.acc_norm</th>
<td id="T_7aef5_row142_col0" class="data row142 col0" >0.504000</td>
<td id="T_7aef5_row142_col1" class="data row142 col1" >0.520000</td>
<td id="T_7aef5_row142_col2" class="data row142 col2" >0.524000</td>
<td id="T_7aef5_row142_col3" class="data row142 col3" >0.492000</td>
<td id="T_7aef5_row142_col4" class="data row142 col4" >0.492000</td>
<td id="T_7aef5_row142_col5" class="data row142 col5" >0.540000</td>
<td id="T_7aef5_row142_col6" class="data row142 col6" >0.504000</td>
</tr>
<tr>
<th id="T_7aef5_level0_row145" class="row_heading level0 row145" >musr_object_placements.acc_norm</th>
<td id="T_7aef5_row145_col0" class="data row145 col0" >0.238281</td>
<td id="T_7aef5_row145_col1" class="data row145 col1" >0.281250</td>
<td id="T_7aef5_row145_col2" class="data row145 col2" >0.289062</td>
<td id="T_7aef5_row145_col3" class="data row145 col3" >0.234375</td>
<td id="T_7aef5_row145_col4" class="data row145 col4" >0.214844</td>
<td id="T_7aef5_row145_col5" class="data row145 col5" >0.351562</td>
<td id="T_7aef5_row145_col6" class="data row145 col6" >0.226562</td>
</tr>
<tr>
<th id="T_7aef5_level0_row148" class="row_heading level0 row148" >musr_team_allocation.acc_norm</th>
<td id="T_7aef5_row148_col0" class="data row148 col0" >0.264000</td>
<td id="T_7aef5_row148_col1" class="data row148 col1" >0.372000</td>
<td id="T_7aef5_row148_col2" class="data row148 col2" >0.368000</td>
<td id="T_7aef5_row148_col3" class="data row148 col3" >0.364000</td>
<td id="T_7aef5_row148_col4" class="data row148 col4" >0.392000</td>
<td id="T_7aef5_row148_col5" class="data row148 col5" >0.256000</td>
<td id="T_7aef5_row148_col6" class="data row148 col6" >0.328000</td>
</tr>
</tbody>
</table>