Spaces:
Running
Running
<style type="text/css"> | |
#T_7aef5 td { | |
overflow-wrap: break-word; | |
max-width: 1px; | |
} | |
#T_7aef5 .col_heading { | |
width: 14.285714285714286%; | |
} | |
#T_7aef5_row15_col0, #T_7aef5_row105_col2, #T_7aef5_row118_col1 { | |
background-color: #f7cbe4; | |
color: #000000; | |
} | |
#T_7aef5_row15_col1, #T_7aef5_row31_col4 { | |
background-color: #f9eff4; | |
color: #000000; | |
} | |
#T_7aef5_row15_col2, #T_7aef5_row34_col4 { | |
background-color: #fbe6f1; | |
color: #000000; | |
} | |
#T_7aef5_row15_col3, #T_7aef5_row58_col4, #T_7aef5_row85_col6, #T_7aef5_row109_col0 { | |
background-color: #f4bfdf; | |
color: #000000; | |
} | |
#T_7aef5_row15_col4, #T_7aef5_row28_col3, #T_7aef5_row46_col3, #T_7aef5_row85_col0, #T_7aef5_row107_col4 { | |
background-color: #f6c7e3; | |
color: #000000; | |
} | |
#T_7aef5_row15_col5, #T_7aef5_row15_col6, #T_7aef5_row31_col3, #T_7aef5_row40_col1, #T_7aef5_row52_col2 { | |
background-color: #f9eef4; | |
color: #000000; | |
} | |
#T_7aef5_row19_col0 { | |
background-color: #a9d874; | |
color: #000000; | |
} | |
#T_7aef5_row19_col1 { | |
background-color: #549825; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row19_col2 { | |
background-color: #cdeaa7; | |
color: #000000; | |
} | |
#T_7aef5_row19_col3, #T_7aef5_row70_col0, #T_7aef5_row115_col0 { | |
background-color: #f7f7f6; | |
color: #000000; | |
} | |
#T_7aef5_row19_col4, #T_7aef5_row88_col6 { | |
background-color: #f1f6ea; | |
color: #000000; | |
} | |
#T_7aef5_row19_col5 { | |
background-color: #7fbc41; | |
color: #000000; | |
} | |
#T_7aef5_row19_col6 { | |
background-color: #8fc654; | |
color: #000000; | |
} | |
#T_7aef5_row22_col0, #T_7aef5_row52_col5 { | |
background-color: #f5f7f3; | |
color: #000000; | |
} | |
#T_7aef5_row22_col1, #T_7aef5_row31_col5 { | |
background-color: #edf6df; | |
color: #000000; | |
} | |
#T_7aef5_row22_col2 { | |
background-color: #ebf6db; | |
color: #000000; | |
} | |
#T_7aef5_row22_col3, #T_7aef5_row22_col4, #T_7aef5_row37_col3, #T_7aef5_row70_col1 { | |
background-color: #f4f7f0; | |
color: #000000; | |
} | |
#T_7aef5_row22_col5 { | |
background-color: #eaf5d9; | |
color: #000000; | |
} | |
#T_7aef5_row22_col6, #T_7aef5_row37_col5 { | |
background-color: #d8efb9; | |
color: #000000; | |
} | |
#T_7aef5_row25_col0, #T_7aef5_row100_col3, #T_7aef5_row145_col1 { | |
background-color: #eeabd2; | |
color: #000000; | |
} | |
#T_7aef5_row25_col1, #T_7aef5_row49_col6 { | |
background-color: #eff6e4; | |
color: #000000; | |
} | |
#T_7aef5_row25_col2 { | |
background-color: #fce3f0; | |
color: #000000; | |
} | |
#T_7aef5_row25_col3, #T_7aef5_row40_col3 { | |
background-color: #df7cb1; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row25_col4 { | |
background-color: #fbd9ec; | |
color: #000000; | |
} | |
#T_7aef5_row25_col5, #T_7aef5_row70_col4, #T_7aef5_row73_col4 { | |
background-color: #f8f2f5; | |
color: #000000; | |
} | |
#T_7aef5_row25_col6, #T_7aef5_row61_col6 { | |
background-color: #faecf3; | |
color: #000000; | |
} | |
#T_7aef5_row28_col0, #T_7aef5_row130_col0 { | |
background-color: #fbe7f2; | |
color: #000000; | |
} | |
#T_7aef5_row28_col1, #T_7aef5_row73_col1 { | |
background-color: #ecf6de; | |
color: #000000; | |
} | |
#T_7aef5_row28_col2, #T_7aef5_row64_col6, #T_7aef5_row70_col6 { | |
background-color: #e2f3ca; | |
color: #000000; | |
} | |
#T_7aef5_row28_col4 { | |
background-color: #f2badc; | |
color: #000000; | |
} | |
#T_7aef5_row28_col5, #T_7aef5_row73_col3, #T_7aef5_row142_col2 { | |
background-color: #f3f6ed; | |
color: #000000; | |
} | |
#T_7aef5_row28_col6, #T_7aef5_row100_col0, #T_7aef5_row145_col2 { | |
background-color: #efb0d6; | |
color: #000000; | |
} | |
#T_7aef5_row31_col0, #T_7aef5_row105_col4 { | |
background-color: #f9f1f5; | |
color: #000000; | |
} | |
#T_7aef5_row31_col1, #T_7aef5_row37_col1 { | |
background-color: #e8f5d5; | |
color: #000000; | |
} | |
#T_7aef5_row31_col2, #T_7aef5_row70_col3, #T_7aef5_row73_col0, #T_7aef5_row142_col5 { | |
background-color: #f0f6e7; | |
color: #000000; | |
} | |
#T_7aef5_row31_col6, #T_7aef5_row37_col4, #T_7aef5_row46_col4, #T_7aef5_row52_col4 { | |
background-color: #f8f3f6; | |
color: #000000; | |
} | |
#T_7aef5_row34_col0, #T_7aef5_row43_col0, #T_7aef5_row124_col1 { | |
background-color: #d24c97; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row34_col1 { | |
background-color: #faeaf2; | |
color: #000000; | |
} | |
#T_7aef5_row34_col2, #T_7aef5_row55_col1 { | |
background-color: #fad6ea; | |
color: #000000; | |
} | |
#T_7aef5_row34_col3, #T_7aef5_row67_col4 { | |
background-color: #e283b7; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row34_col5, #T_7aef5_row55_col0, #T_7aef5_row90_col1 { | |
background-color: #f5c6e2; | |
color: #000000; | |
} | |
#T_7aef5_row34_col6, #T_7aef5_row109_col2, #T_7aef5_row148_col2 { | |
background-color: #f9d3e8; | |
color: #000000; | |
} | |
#T_7aef5_row37_col0, #T_7aef5_row49_col0, #T_7aef5_row55_col5, #T_7aef5_row88_col2 { | |
background-color: #f7f6f7; | |
color: #000000; | |
} | |
#T_7aef5_row37_col2, #T_7aef5_row46_col6, #T_7aef5_row52_col0, #T_7aef5_row64_col5, #T_7aef5_row142_col0, #T_7aef5_row142_col6 { | |
background-color: #f6f7f5; | |
color: #000000; | |
} | |
#T_7aef5_row37_col6 { | |
background-color: #c6e79c; | |
color: #000000; | |
} | |
#T_7aef5_row40_col0, #T_7aef5_row103_col2, #T_7aef5_row145_col6 { | |
background-color: #e388ba; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row40_col2, #T_7aef5_row76_col3, #T_7aef5_row97_col3, #T_7aef5_row145_col0 { | |
background-color: #e590bf; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row40_col4 { | |
background-color: #eeadd4; | |
color: #000000; | |
} | |
#T_7aef5_row40_col5, #T_7aef5_row55_col3 { | |
background-color: #f8cee6; | |
color: #000000; | |
} | |
#T_7aef5_row40_col6 { | |
background-color: #fbe8f2; | |
color: #000000; | |
} | |
#T_7aef5_row43_col1 { | |
background-color: #f6c9e3; | |
color: #000000; | |
} | |
#T_7aef5_row43_col2, #T_7aef5_row61_col4, #T_7aef5_row82_col2 { | |
background-color: #d861a2; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row43_col3, #T_7aef5_row67_col3, #T_7aef5_row79_col1, #T_7aef5_row82_col6 { | |
background-color: #cf4191; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row43_col4, #T_7aef5_row79_col2, #T_7aef5_row79_col5 { | |
background-color: #d34f99; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row43_col5, #T_7aef5_row130_col6, #T_7aef5_row136_col5, #T_7aef5_row148_col6 { | |
background-color: #f4c1df; | |
color: #000000; | |
} | |
#T_7aef5_row43_col6, #T_7aef5_row58_col6, #T_7aef5_row111_col1, #T_7aef5_row138_col1 { | |
background-color: #fcdbed; | |
color: #000000; | |
} | |
#T_7aef5_row46_col0 { | |
background-color: #fde2f0; | |
color: #000000; | |
} | |
#T_7aef5_row46_col1 { | |
background-color: #bbe28a; | |
color: #000000; | |
} | |
#T_7aef5_row46_col2 { | |
background-color: #fde0ef; | |
color: #000000; | |
} | |
#T_7aef5_row46_col5 { | |
background-color: #f5f7f2; | |
color: #000000; | |
} | |
#T_7aef5_row49_col1 { | |
background-color: #e9f5d6; | |
color: #000000; | |
} | |
#T_7aef5_row49_col2 { | |
background-color: #81bd44; | |
color: #000000; | |
} | |
#T_7aef5_row49_col3, #T_7aef5_row67_col1, #T_7aef5_row148_col0 { | |
background-color: #ea9fca; | |
color: #000000; | |
} | |
#T_7aef5_row49_col4, #T_7aef5_row90_col5, #T_7aef5_row97_col6 { | |
background-color: #f0b2d7; | |
color: #000000; | |
} | |
#T_7aef5_row49_col5 { | |
background-color: #83bf46; | |
color: #000000; | |
} | |
#T_7aef5_row52_col1 { | |
background-color: #c0e593; | |
color: #000000; | |
} | |
#T_7aef5_row52_col3, #T_7aef5_row58_col1, #T_7aef5_row58_col2, #T_7aef5_row64_col1, #T_7aef5_row109_col3 { | |
background-color: #fce5f1; | |
color: #000000; | |
} | |
#T_7aef5_row52_col6 { | |
background-color: #e1f3c7; | |
color: #000000; | |
} | |
#T_7aef5_row55_col2, #T_7aef5_row97_col0, #T_7aef5_row107_col1 { | |
background-color: #f3bcdd; | |
color: #000000; | |
} | |
#T_7aef5_row55_col4 { | |
background-color: #c82884; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row55_col6, #T_7aef5_row138_col3 { | |
background-color: #f8d0e7; | |
color: #000000; | |
} | |
#T_7aef5_row58_col0, #T_7aef5_row121_col1 { | |
background-color: #e897c4; | |
color: #000000; | |
} | |
#T_7aef5_row58_col3, #T_7aef5_row79_col0, #T_7aef5_row107_col0 { | |
background-color: #e07eb3; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row58_col5, #T_7aef5_row64_col2, #T_7aef5_row109_col1 { | |
background-color: #fbe9f2; | |
color: #000000; | |
} | |
#T_7aef5_row61_col0, #T_7aef5_row67_col2, #T_7aef5_row100_col2, #T_7aef5_row100_col6 { | |
background-color: #eba3cd; | |
color: #000000; | |
} | |
#T_7aef5_row61_col1, #T_7aef5_row88_col0, #T_7aef5_row88_col1, #T_7aef5_row88_col3, #T_7aef5_row88_col4, #T_7aef5_row88_col5, #T_7aef5_row109_col4 { | |
background-color: #f8f4f6; | |
color: #000000; | |
} | |
#T_7aef5_row61_col2, #T_7aef5_row97_col5, #T_7aef5_row100_col5 { | |
background-color: #f2b8db; | |
color: #000000; | |
} | |
#T_7aef5_row61_col3, #T_7aef5_row136_col0 { | |
background-color: #dc70aa; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row61_col5, #T_7aef5_row67_col6, #T_7aef5_row148_col1 { | |
background-color: #fad4e9; | |
color: #000000; | |
} | |
#T_7aef5_row64_col0, #T_7aef5_row145_col4 { | |
background-color: #e181b5; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row64_col3, #T_7aef5_row94_col4, #T_7aef5_row136_col4 { | |
background-color: #e48bbc; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row64_col4, #T_7aef5_row138_col4, #T_7aef5_row148_col3 { | |
background-color: #f9d1e8; | |
color: #000000; | |
} | |
#T_7aef5_row67_col0, #T_7aef5_row76_col1, #T_7aef5_row118_col6 { | |
background-color: #cc368b; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row67_col5 { | |
background-color: #fddeee; | |
color: #000000; | |
} | |
#T_7aef5_row70_col2 { | |
background-color: #cbe9a4; | |
color: #000000; | |
} | |
#T_7aef5_row70_col5 { | |
background-color: #d6eeb6; | |
color: #000000; | |
} | |
#T_7aef5_row73_col2 { | |
background-color: #a1d26a; | |
color: #000000; | |
} | |
#T_7aef5_row73_col5 { | |
background-color: #9acd61; | |
color: #000000; | |
} | |
#T_7aef5_row73_col6 { | |
background-color: #b2dd7f; | |
color: #000000; | |
} | |
#T_7aef5_row76_col0, #T_7aef5_row133_col0 { | |
background-color: #b51370; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row76_col2, #T_7aef5_row90_col3, #T_7aef5_row94_col6, #T_7aef5_row148_col5 { | |
background-color: #e89ac6; | |
color: #000000; | |
} | |
#T_7aef5_row76_col4, #T_7aef5_row94_col5, #T_7aef5_row107_col2 { | |
background-color: #e692c1; | |
color: #000000; | |
} | |
#T_7aef5_row76_col5, #T_7aef5_row127_col6 { | |
background-color: #c2197a; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row76_col6, #T_7aef5_row111_col0 { | |
background-color: #e99cc8; | |
color: #000000; | |
} | |
#T_7aef5_row79_col3 { | |
background-color: #e286b8; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row79_col4 { | |
background-color: #c72482; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row79_col6, #T_7aef5_row82_col3 { | |
background-color: #d14895; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row82_col0, #T_7aef5_row111_col6 { | |
background-color: #d65a9f; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row82_col1, #T_7aef5_row82_col4, #T_7aef5_row136_col3 { | |
background-color: #c92b86; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row82_col5, #T_7aef5_row121_col0 { | |
background-color: #ca2f88; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row85_col1, #T_7aef5_row94_col1, #T_7aef5_row136_col6 { | |
background-color: #f3bdde; | |
color: #000000; | |
} | |
#T_7aef5_row85_col2, #T_7aef5_row97_col1, #T_7aef5_row115_col6, #T_7aef5_row138_col6, #T_7aef5_row145_col5 { | |
background-color: #f7cce5; | |
color: #000000; | |
} | |
#T_7aef5_row85_col3, #T_7aef5_row85_col5, #T_7aef5_row100_col1 { | |
background-color: #f5c2e0; | |
color: #000000; | |
} | |
#T_7aef5_row85_col4, #T_7aef5_row100_col4 { | |
background-color: #f1b7da; | |
color: #000000; | |
} | |
#T_7aef5_row90_col0, #T_7aef5_row105_col0, #T_7aef5_row136_col1, #T_7aef5_row136_col2 { | |
background-color: #f1b5d9; | |
color: #000000; | |
} | |
#T_7aef5_row90_col2, #T_7aef5_row94_col0, #T_7aef5_row94_col2, #T_7aef5_row97_col2, #T_7aef5_row127_col0 { | |
background-color: #eba1cb; | |
color: #000000; | |
} | |
#T_7aef5_row90_col4, #T_7aef5_row90_col6, #T_7aef5_row107_col3 { | |
background-color: #eda8d1; | |
color: #000000; | |
} | |
#T_7aef5_row94_col3, #T_7aef5_row145_col3 { | |
background-color: #e58dbe; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row97_col4, #T_7aef5_row103_col3 { | |
background-color: #eca6cf; | |
color: #000000; | |
} | |
#T_7aef5_row103_col0 { | |
background-color: #db6ca8; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row103_col1 { | |
background-color: #e795c3; | |
color: #000000; | |
} | |
#T_7aef5_row103_col4, #T_7aef5_row138_col0 { | |
background-color: #f5c4e1; | |
color: #000000; | |
} | |
#T_7aef5_row103_col5 { | |
background-color: #b91574; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row103_col6 { | |
background-color: #b9e187; | |
color: #000000; | |
} | |
#T_7aef5_row105_col1, #T_7aef5_row138_col5 { | |
background-color: #fbd8eb; | |
color: #000000; | |
} | |
#T_7aef5_row105_col3, #T_7aef5_row127_col1 { | |
background-color: #fce4f0; | |
color: #000000; | |
} | |
#T_7aef5_row105_col5 { | |
background-color: #d75ea1; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row105_col6 { | |
background-color: #88c24c; | |
color: #000000; | |
} | |
#T_7aef5_row107_col5 { | |
background-color: #c01879; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row107_col6 { | |
background-color: #95cb5c; | |
color: #000000; | |
} | |
#T_7aef5_row109_col5 { | |
background-color: #d965a4; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row109_col6 { | |
background-color: #71b038; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row111_col2, #T_7aef5_row118_col5 { | |
background-color: #970559; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row111_col3, #T_7aef5_row111_col4, #T_7aef5_row115_col3, #T_7aef5_row115_col4, #T_7aef5_row118_col3, #T_7aef5_row118_col4, #T_7aef5_row121_col3, #T_7aef5_row121_col4, #T_7aef5_row124_col3, #T_7aef5_row124_col4, #T_7aef5_row127_col3, #T_7aef5_row127_col4, #T_7aef5_row130_col3, #T_7aef5_row130_col4, #T_7aef5_row133_col3, #T_7aef5_row133_col4 { | |
background-color: #8e0152; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row111_col5 { | |
background-color: #aa0e68; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row115_col1 { | |
background-color: #ddf1c1; | |
color: #000000; | |
} | |
#T_7aef5_row115_col2, #T_7aef5_row118_col2 { | |
background-color: #9b075c; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row115_col5 { | |
background-color: #c41a7c; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row118_col0 { | |
background-color: #d4539b; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row121_col2, #T_7aef5_row124_col6 { | |
background-color: #9d085e; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row121_col5, #T_7aef5_row133_col5 { | |
background-color: #99065a; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row121_col6 { | |
background-color: #bb1675; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row124_col0 { | |
background-color: #b1116d; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row124_col2, #T_7aef5_row124_col5, #T_7aef5_row130_col2, #T_7aef5_row133_col2 { | |
background-color: #900254; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row127_col2 { | |
background-color: #940457; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row127_col5, #T_7aef5_row133_col6 { | |
background-color: #a60c65; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row130_col1 { | |
background-color: #d0ecad; | |
color: #000000; | |
} | |
#T_7aef5_row130_col5 { | |
background-color: #c51d7e; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row133_col1 { | |
background-color: #cb3289; | |
color: #f1f1f1; | |
} | |
#T_7aef5_row138_col2, #T_7aef5_row148_col4 { | |
background-color: #fcdded; | |
color: #000000; | |
} | |
#T_7aef5_row142_col1 { | |
background-color: #f3f7ef; | |
color: #000000; | |
} | |
#T_7aef5_row142_col3, #T_7aef5_row142_col4 { | |
background-color: #f8f5f6; | |
color: #000000; | |
} | |
</style> | |
<table id="T_7aef5"> | |
<thead> | |
<tr> | |
<th class="blank level0" > </th> | |
<th id="T_7aef5_level0_col0" class="col_heading level0 col0" >Spestly/Atlas-Pro-1.5B-Preview</th> | |
<th id="T_7aef5_level0_col1" class="col_heading level0 col1" >Spestly/Atlas-Pro-7B-Preview</th> | |
<th id="T_7aef5_level0_col2" class="col_heading level0 col2" >01-ai/Yi-6B</th> | |
<th id="T_7aef5_level0_col3" class="col_heading level0 col3" >deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</th> | |
<th id="T_7aef5_level0_col4" class="col_heading level0 col4" >deepseek-ai/DeepSeek-R1-Distill-Qwen-7B</th> | |
<th id="T_7aef5_level0_col5" class="col_heading level0 col5" >meta-llama/Meta-Llama-3.1-8B</th> | |
<th id="T_7aef5_level0_col6" class="col_heading level0 col6" >meta-llama/Llama-3.2-3B-Instruct</th> | |
</tr> | |
</thead> | |
<tbody> | |
<tr> | |
<th id="T_7aef5_level0_row15" class="row_heading level0 row15" >bbh.acc_norm</th> | |
<td id="T_7aef5_row15_col0" class="data row15 col0" >0.348030</td> | |
<td id="T_7aef5_row15_col1" class="data row15 col1" >0.465891</td> | |
<td id="T_7aef5_row15_col2" class="data row15 col2" >0.426662</td> | |
<td id="T_7aef5_row15_col3" class="data row15 col3" >0.321298</td> | |
<td id="T_7aef5_row15_col4" class="data row15 col4" >0.341087</td> | |
<td id="T_7aef5_row15_col5" class="data row15 col5" >0.463808</td> | |
<td id="T_7aef5_row15_col6" class="data row15 col6" >0.458601</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row19" class="row_heading level0 row19" >bbh_boolean_expressions.acc_norm</th> | |
<td id="T_7aef5_row19_col0" class="data row19 col0" >0.724000</td> | |
<td id="T_7aef5_row19_col1" class="data row19 col1" >0.884000</td> | |
<td id="T_7aef5_row19_col2" class="data row19 col2" >0.656000</td> | |
<td id="T_7aef5_row19_col3" class="data row19 col3" >0.500000</td> | |
<td id="T_7aef5_row19_col4" class="data row19 col4" >0.532000</td> | |
<td id="T_7aef5_row19_col5" class="data row19 col5" >0.800000</td> | |
<td id="T_7aef5_row19_col6" class="data row19 col6" >0.772000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row22" class="row_heading level0 row22" >bbh_causal_judgement.acc_norm</th> | |
<td id="T_7aef5_row22_col0" class="data row22 col0" >0.508021</td> | |
<td id="T_7aef5_row22_col1" class="data row22 col1" >0.561497</td> | |
<td id="T_7aef5_row22_col2" class="data row22 col2" >0.572193</td> | |
<td id="T_7aef5_row22_col3" class="data row22 col3" >0.518717</td> | |
<td id="T_7aef5_row22_col4" class="data row22 col4" >0.518717</td> | |
<td id="T_7aef5_row22_col5" class="data row22 col5" >0.577540</td> | |
<td id="T_7aef5_row22_col6" class="data row22 col6" >0.631016</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row25" class="row_heading level0 row25" >bbh_date_understanding.acc_norm</th> | |
<td id="T_7aef5_row25_col0" class="data row25 col0" >0.284000</td> | |
<td id="T_7aef5_row25_col1" class="data row25 col1" >0.548000</td> | |
<td id="T_7aef5_row25_col2" class="data row25 col2" >0.412000</td> | |
<td id="T_7aef5_row25_col3" class="data row25 col3" >0.208000</td> | |
<td id="T_7aef5_row25_col4" class="data row25 col4" >0.384000</td> | |
<td id="T_7aef5_row25_col5" class="data row25 col5" >0.480000</td> | |
<td id="T_7aef5_row25_col6" class="data row25 col6" >0.452000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row28" class="row_heading level0 row28" >bbh_disambiguation_qa.acc_norm</th> | |
<td id="T_7aef5_row28_col0" class="data row28 col0" >0.432000</td> | |
<td id="T_7aef5_row28_col1" class="data row28 col1" >0.564000</td> | |
<td id="T_7aef5_row28_col2" class="data row28 col2" >0.608000</td> | |
<td id="T_7aef5_row28_col3" class="data row28 col3" >0.340000</td> | |
<td id="T_7aef5_row28_col4" class="data row28 col4" >0.312000</td> | |
<td id="T_7aef5_row28_col5" class="data row28 col5" >0.524000</td> | |
<td id="T_7aef5_row28_col6" class="data row28 col6" >0.292000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row31" class="row_heading level0 row31" >bbh_formal_fallacies.acc_norm</th> | |
<td id="T_7aef5_row31_col0" class="data row31 col0" >0.476000</td> | |
<td id="T_7aef5_row31_col1" class="data row31 col1" >0.588000</td> | |
<td id="T_7aef5_row31_col2" class="data row31 col2" >0.540000</td> | |
<td id="T_7aef5_row31_col3" class="data row31 col3" >0.464000</td> | |
<td id="T_7aef5_row31_col4" class="data row31 col4" >0.468000</td> | |
<td id="T_7aef5_row31_col5" class="data row31 col5" >0.560000</td> | |
<td id="T_7aef5_row31_col6" class="data row31 col6" >0.484000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row34" class="row_heading level0 row34" >bbh_geometric_shapes.acc_norm</th> | |
<td id="T_7aef5_row34_col0" class="data row34 col0" >0.156000</td> | |
<td id="T_7aef5_row34_col1" class="data row34 col1" >0.444000</td> | |
<td id="T_7aef5_row34_col2" class="data row34 col2" >0.376000</td> | |
<td id="T_7aef5_row34_col3" class="data row34 col3" >0.220000</td> | |
<td id="T_7aef5_row34_col4" class="data row34 col4" >0.428000</td> | |
<td id="T_7aef5_row34_col5" class="data row34 col5" >0.336000</td> | |
<td id="T_7aef5_row34_col6" class="data row34 col6" >0.368000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row37" class="row_heading level0 row37" >bbh_hyperbaton.acc_norm</th> | |
<td id="T_7aef5_row37_col0" class="data row37 col0" >0.496000</td> | |
<td id="T_7aef5_row37_col1" class="data row37 col1" >0.588000</td> | |
<td id="T_7aef5_row37_col2" class="data row37 col2" >0.504000</td> | |
<td id="T_7aef5_row37_col3" class="data row37 col3" >0.516000</td> | |
<td id="T_7aef5_row37_col4" class="data row37 col4" >0.484000</td> | |
<td id="T_7aef5_row37_col5" class="data row37 col5" >0.632000</td> | |
<td id="T_7aef5_row37_col6" class="data row37 col6" >0.668000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row40" class="row_heading level0 row40" >bbh_logical_deduction_five_objects.acc_norm</th> | |
<td id="T_7aef5_row40_col0" class="data row40 col0" >0.228000</td> | |
<td id="T_7aef5_row40_col1" class="data row40 col1" >0.464000</td> | |
<td id="T_7aef5_row40_col2" class="data row40 col2" >0.240000</td> | |
<td id="T_7aef5_row40_col3" class="data row40 col3" >0.208000</td> | |
<td id="T_7aef5_row40_col4" class="data row40 col4" >0.288000</td> | |
<td id="T_7aef5_row40_col5" class="data row40 col5" >0.356000</td> | |
<td id="T_7aef5_row40_col6" class="data row40 col6" >0.436000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row43" class="row_heading level0 row43" >bbh_logical_deduction_seven_objects.acc_norm</th> | |
<td id="T_7aef5_row43_col0" class="data row43 col0" >0.156000</td> | |
<td id="T_7aef5_row43_col1" class="data row43 col1" >0.344000</td> | |
<td id="T_7aef5_row43_col2" class="data row43 col2" >0.176000</td> | |
<td id="T_7aef5_row43_col3" class="data row43 col3" >0.144000</td> | |
<td id="T_7aef5_row43_col4" class="data row43 col4" >0.160000</td> | |
<td id="T_7aef5_row43_col5" class="data row43 col5" >0.328000</td> | |
<td id="T_7aef5_row43_col6" class="data row43 col6" >0.388000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row46" class="row_heading level0 row46" >bbh_logical_deduction_three_objects.acc_norm</th> | |
<td id="T_7aef5_row46_col0" class="data row46 col0" >0.408000</td> | |
<td id="T_7aef5_row46_col1" class="data row46 col1" >0.692000</td> | |
<td id="T_7aef5_row46_col2" class="data row46 col2" >0.400000</td> | |
<td id="T_7aef5_row46_col3" class="data row46 col3" >0.340000</td> | |
<td id="T_7aef5_row46_col4" class="data row46 col4" >0.484000</td> | |
<td id="T_7aef5_row46_col5" class="data row46 col5" >0.512000</td> | |
<td id="T_7aef5_row46_col6" class="data row46 col6" >0.504000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row49" class="row_heading level0 row49" >bbh_movie_recommendation.acc_norm</th> | |
<td id="T_7aef5_row49_col0" class="data row49 col0" >0.496000</td> | |
<td id="T_7aef5_row49_col1" class="data row49 col1" >0.584000</td> | |
<td id="T_7aef5_row49_col2" class="data row49 col2" >0.796000</td> | |
<td id="T_7aef5_row49_col3" class="data row49 col3" >0.264000</td> | |
<td id="T_7aef5_row49_col4" class="data row49 col4" >0.296000</td> | |
<td id="T_7aef5_row49_col5" class="data row49 col5" >0.792000</td> | |
<td id="T_7aef5_row49_col6" class="data row49 col6" >0.548000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row52" class="row_heading level0 row52" >bbh_navigate.acc_norm</th> | |
<td id="T_7aef5_row52_col0" class="data row52 col0" >0.504000</td> | |
<td id="T_7aef5_row52_col1" class="data row52 col1" >0.680000</td> | |
<td id="T_7aef5_row52_col2" class="data row52 col2" >0.464000</td> | |
<td id="T_7aef5_row52_col3" class="data row52 col3" >0.420000</td> | |
<td id="T_7aef5_row52_col4" class="data row52 col4" >0.484000</td> | |
<td id="T_7aef5_row52_col5" class="data row52 col5" >0.508000</td> | |
<td id="T_7aef5_row52_col6" class="data row52 col6" >0.612000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row55" class="row_heading level0 row55" >bbh_object_counting.acc_norm</th> | |
<td id="T_7aef5_row55_col0" class="data row55 col0" >0.336000</td> | |
<td id="T_7aef5_row55_col1" class="data row55 col1" >0.376000</td> | |
<td id="T_7aef5_row55_col2" class="data row55 col2" >0.316000</td> | |
<td id="T_7aef5_row55_col3" class="data row55 col3" >0.356000</td> | |
<td id="T_7aef5_row55_col4" class="data row55 col4" >0.116000</td> | |
<td id="T_7aef5_row55_col5" class="data row55 col5" >0.496000</td> | |
<td id="T_7aef5_row55_col6" class="data row55 col6" >0.360000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row58" class="row_heading level0 row58" >bbh_penguins_in_a_table.acc_norm</th> | |
<td id="T_7aef5_row58_col0" class="data row58 col0" >0.253425</td> | |
<td id="T_7aef5_row58_col1" class="data row58 col1" >0.424658</td> | |
<td id="T_7aef5_row58_col2" class="data row58 col2" >0.424658</td> | |
<td id="T_7aef5_row58_col3" class="data row58 col3" >0.212329</td> | |
<td id="T_7aef5_row58_col4" class="data row58 col4" >0.321918</td> | |
<td id="T_7aef5_row58_col5" class="data row58 col5" >0.438356</td> | |
<td id="T_7aef5_row58_col6" class="data row58 col6" >0.390411</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row61" class="row_heading level0 row61" >bbh_reasoning_about_colored_objects.acc_norm</th> | |
<td id="T_7aef5_row61_col0" class="data row61 col0" >0.272000</td> | |
<td id="T_7aef5_row61_col1" class="data row61 col1" >0.488000</td> | |
<td id="T_7aef5_row61_col2" class="data row61 col2" >0.308000</td> | |
<td id="T_7aef5_row61_col3" class="data row61 col3" >0.192000</td> | |
<td id="T_7aef5_row61_col4" class="data row61 col4" >0.176000</td> | |
<td id="T_7aef5_row61_col5" class="data row61 col5" >0.372000</td> | |
<td id="T_7aef5_row61_col6" class="data row61 col6" >0.452000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row64" class="row_heading level0 row64" >bbh_ruin_names.acc_norm</th> | |
<td id="T_7aef5_row64_col0" class="data row64 col0" >0.216000</td> | |
<td id="T_7aef5_row64_col1" class="data row64 col1" >0.424000</td> | |
<td id="T_7aef5_row64_col2" class="data row64 col2" >0.440000</td> | |
<td id="T_7aef5_row64_col3" class="data row64 col3" >0.232000</td> | |
<td id="T_7aef5_row64_col4" class="data row64 col4" >0.364000</td> | |
<td id="T_7aef5_row64_col5" class="data row64 col5" >0.504000</td> | |
<td id="T_7aef5_row64_col6" class="data row64 col6" >0.608000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row67" class="row_heading level0 row67" >bbh_salient_translation_error_detection.acc_norm</th> | |
<td id="T_7aef5_row67_col0" class="data row67 col0" >0.132000</td> | |
<td id="T_7aef5_row67_col1" class="data row67 col1" >0.264000</td> | |
<td id="T_7aef5_row67_col2" class="data row67 col2" >0.272000</td> | |
<td id="T_7aef5_row67_col3" class="data row67 col3" >0.144000</td> | |
<td id="T_7aef5_row67_col4" class="data row67 col4" >0.220000</td> | |
<td id="T_7aef5_row67_col5" class="data row67 col5" >0.396000</td> | |
<td id="T_7aef5_row67_col6" class="data row67 col6" >0.372000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row70" class="row_heading level0 row70" >bbh_snarks.acc_norm</th> | |
<td id="T_7aef5_row70_col0" class="data row70 col0" >0.500000</td> | |
<td id="T_7aef5_row70_col1" class="data row70 col1" >0.516854</td> | |
<td id="T_7aef5_row70_col2" class="data row70 col2" >0.657303</td> | |
<td id="T_7aef5_row70_col3" class="data row70 col3" >0.539326</td> | |
<td id="T_7aef5_row70_col4" class="data row70 col4" >0.477528</td> | |
<td id="T_7aef5_row70_col5" class="data row70 col5" >0.634831</td> | |
<td id="T_7aef5_row70_col6" class="data row70 col6" >0.606742</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row73" class="row_heading level0 row73" >bbh_sports_understanding.acc_norm</th> | |
<td id="T_7aef5_row73_col0" class="data row73 col0" >0.540000</td> | |
<td id="T_7aef5_row73_col1" class="data row73 col1" >0.564000</td> | |
<td id="T_7aef5_row73_col2" class="data row73 col2" >0.740000</td> | |
<td id="T_7aef5_row73_col3" class="data row73 col3" >0.524000</td> | |
<td id="T_7aef5_row73_col4" class="data row73 col4" >0.480000</td> | |
<td id="T_7aef5_row73_col5" class="data row73 col5" >0.752000</td> | |
<td id="T_7aef5_row73_col6" class="data row73 col6" >0.708000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row76" class="row_heading level0 row76" >bbh_temporal_sequences.acc_norm</th> | |
<td id="T_7aef5_row76_col0" class="data row76 col0" >0.072000</td> | |
<td id="T_7aef5_row76_col1" class="data row76 col1" >0.132000</td> | |
<td id="T_7aef5_row76_col2" class="data row76 col2" >0.256000</td> | |
<td id="T_7aef5_row76_col3" class="data row76 col3" >0.240000</td> | |
<td id="T_7aef5_row76_col4" class="data row76 col4" >0.244000</td> | |
<td id="T_7aef5_row76_col5" class="data row76 col5" >0.096000</td> | |
<td id="T_7aef5_row76_col6" class="data row76 col6" >0.260000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row79" class="row_heading level0 row79" >bbh_tracking_shuffled_objects_five_objects.acc_norm</th> | |
<td id="T_7aef5_row79_col0" class="data row79 col0" >0.212000</td> | |
<td id="T_7aef5_row79_col1" class="data row79 col1" >0.144000</td> | |
<td id="T_7aef5_row79_col2" class="data row79 col2" >0.160000</td> | |
<td id="T_7aef5_row79_col3" class="data row79 col3" >0.224000</td> | |
<td id="T_7aef5_row79_col4" class="data row79 col4" >0.112000</td> | |
<td id="T_7aef5_row79_col5" class="data row79 col5" >0.160000</td> | |
<td id="T_7aef5_row79_col6" class="data row79 col6" >0.152000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row82" class="row_heading level0 row82" >bbh_tracking_shuffled_objects_seven_objects.acc_norm</th> | |
<td id="T_7aef5_row82_col0" class="data row82 col0" >0.168000</td> | |
<td id="T_7aef5_row82_col1" class="data row82 col1" >0.120000</td> | |
<td id="T_7aef5_row82_col2" class="data row82 col2" >0.176000</td> | |
<td id="T_7aef5_row82_col3" class="data row82 col3" >0.152000</td> | |
<td id="T_7aef5_row82_col4" class="data row82 col4" >0.120000</td> | |
<td id="T_7aef5_row82_col5" class="data row82 col5" >0.124000</td> | |
<td id="T_7aef5_row82_col6" class="data row82 col6" >0.144000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row85" class="row_heading level0 row85" >bbh_tracking_shuffled_objects_three_objects.acc_norm</th> | |
<td id="T_7aef5_row85_col0" class="data row85 col0" >0.340000</td> | |
<td id="T_7aef5_row85_col1" class="data row85 col1" >0.320000</td> | |
<td id="T_7aef5_row85_col2" class="data row85 col2" >0.352000</td> | |
<td id="T_7aef5_row85_col3" class="data row85 col3" >0.332000</td> | |
<td id="T_7aef5_row85_col4" class="data row85 col4" >0.304000</td> | |
<td id="T_7aef5_row85_col5" class="data row85 col5" >0.332000</td> | |
<td id="T_7aef5_row85_col6" class="data row85 col6" >0.324000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row88" class="row_heading level0 row88" >bbh_web_of_lies.acc_norm</th> | |
<td id="T_7aef5_row88_col0" class="data row88 col0" >0.488000</td> | |
<td id="T_7aef5_row88_col1" class="data row88 col1" >0.488000</td> | |
<td id="T_7aef5_row88_col2" class="data row88 col2" >0.496000</td> | |
<td id="T_7aef5_row88_col3" class="data row88 col3" >0.488000</td> | |
<td id="T_7aef5_row88_col4" class="data row88 col4" >0.488000</td> | |
<td id="T_7aef5_row88_col5" class="data row88 col5" >0.488000</td> | |
<td id="T_7aef5_row88_col6" class="data row88 col6" >0.532000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row90" class="row_heading level0 row90" >gpqa.acc_norm</th> | |
<td id="T_7aef5_row90_col0" class="data row90 col0" >0.296980</td> | |
<td id="T_7aef5_row90_col1" class="data row90 col1" >0.337248</td> | |
<td id="T_7aef5_row90_col2" class="data row90 col2" >0.269295</td> | |
<td id="T_7aef5_row90_col3" class="data row90 col3" >0.255872</td> | |
<td id="T_7aef5_row90_col4" class="data row90 col4" >0.279362</td> | |
<td id="T_7aef5_row90_col5" class="data row90 col5" >0.296141</td> | |
<td id="T_7aef5_row90_col6" class="data row90 col6" >0.278523</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row94" class="row_heading level0 row94" >gpqa_diamond.acc_norm</th> | |
<td id="T_7aef5_row94_col0" class="data row94 col0" >0.267677</td> | |
<td id="T_7aef5_row94_col1" class="data row94 col1" >0.318182</td> | |
<td id="T_7aef5_row94_col2" class="data row94 col2" >0.267677</td> | |
<td id="T_7aef5_row94_col3" class="data row94 col3" >0.237374</td> | |
<td id="T_7aef5_row94_col4" class="data row94 col4" >0.232323</td> | |
<td id="T_7aef5_row94_col5" class="data row94 col5" >0.242424</td> | |
<td id="T_7aef5_row94_col6" class="data row94 col6" >0.257576</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row97" class="row_heading level0 row97" >gpqa_extended.acc_norm</th> | |
<td id="T_7aef5_row97_col0" class="data row97 col0" >0.313187</td> | |
<td id="T_7aef5_row97_col1" class="data row97 col1" >0.351648</td> | |
<td id="T_7aef5_row97_col2" class="data row97 col2" >0.267399</td> | |
<td id="T_7aef5_row97_col3" class="data row97 col3" >0.239927</td> | |
<td id="T_7aef5_row97_col4" class="data row97 col4" >0.276557</td> | |
<td id="T_7aef5_row97_col5" class="data row97 col5" >0.305861</td> | |
<td id="T_7aef5_row97_col6" class="data row97 col6" >0.293040</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row100" class="row_heading level0 row100" >gpqa_main.acc_norm</th> | |
<td id="T_7aef5_row100_col0" class="data row100 col0" >0.290179</td> | |
<td id="T_7aef5_row100_col1" class="data row100 col1" >0.328125</td> | |
<td id="T_7aef5_row100_col2" class="data row100 col2" >0.272321</td> | |
<td id="T_7aef5_row100_col3" class="data row100 col3" >0.283482</td> | |
<td id="T_7aef5_row100_col4" class="data row100 col4" >0.303571</td> | |
<td id="T_7aef5_row100_col5" class="data row100 col5" >0.308036</td> | |
<td id="T_7aef5_row100_col6" class="data row100 col6" >0.270089</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row103" class="row_heading level0 row103" >ifeval.prompt_level_strict_acc</th> | |
<td id="T_7aef5_row103_col0" class="data row103 col0" >0.188540</td> | |
<td id="T_7aef5_row103_col1" class="data row103 col1" >0.249538</td> | |
<td id="T_7aef5_row103_col2" class="data row103 col2" >0.227357</td> | |
<td id="T_7aef5_row103_col3" class="data row103 col3" >0.275416</td> | |
<td id="T_7aef5_row103_col4" class="data row103 col4" >0.332717</td> | |
<td id="T_7aef5_row103_col5" class="data row103 col5" >0.081331</td> | |
<td id="T_7aef5_row103_col6" class="data row103 col6" >0.696858</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row105" class="row_heading level0 row105" >ifeval.inst_level_strict_acc</th> | |
<td id="T_7aef5_row105_col0" class="data row105 col0" >0.297362</td> | |
<td id="T_7aef5_row105_col1" class="data row105 col1" >0.381295</td> | |
<td id="T_7aef5_row105_col2" class="data row105 col2" >0.351319</td> | |
<td id="T_7aef5_row105_col3" class="data row105 col3" >0.417266</td> | |
<td id="T_7aef5_row105_col4" class="data row105 col4" >0.474820</td> | |
<td id="T_7aef5_row105_col5" class="data row105 col5" >0.172662</td> | |
<td id="T_7aef5_row105_col6" class="data row105 col6" >0.781775</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row107" class="row_heading level0 row107" >ifeval.prompt_level_loose_acc</th> | |
<td id="T_7aef5_row107_col0" class="data row107 col0" >0.214418</td> | |
<td id="T_7aef5_row107_col1" class="data row107 col1" >0.314233</td> | |
<td id="T_7aef5_row107_col2" class="data row107 col2" >0.243993</td> | |
<td id="T_7aef5_row107_col3" class="data row107 col3" >0.280961</td> | |
<td id="T_7aef5_row107_col4" class="data row107 col4" >0.340111</td> | |
<td id="T_7aef5_row107_col5" class="data row107 col5" >0.092421</td> | |
<td id="T_7aef5_row107_col6" class="data row107 col6" >0.757856</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row109" class="row_heading level0 row109" >ifeval.inst_level_loose_acc</th> | |
<td id="T_7aef5_row109_col0" class="data row109 col0" >0.323741</td> | |
<td id="T_7aef5_row109_col1" class="data row109 col1" >0.437650</td> | |
<td id="T_7aef5_row109_col2" class="data row109 col2" >0.368106</td> | |
<td id="T_7aef5_row109_col3" class="data row109 col3" >0.423261</td> | |
<td id="T_7aef5_row109_col4" class="data row109 col4" >0.484412</td> | |
<td id="T_7aef5_row109_col5" class="data row109 col5" >0.179856</td> | |
<td id="T_7aef5_row109_col6" class="data row109 col6" >0.826139</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row111" class="row_heading level0 row111" >math_hard.exact_match</th> | |
<td id="T_7aef5_row111_col0" class="data row111 col0" >0.258308</td> | |
<td id="T_7aef5_row111_col1" class="data row111 col1" >0.388973</td> | |
<td id="T_7aef5_row111_col2" class="data row111 col2" >0.015861</td> | |
<td id="T_7aef5_row111_col3" class="data row111 col3" >0.000000</td> | |
<td id="T_7aef5_row111_col4" class="data row111 col4" >0.000000</td> | |
<td id="T_7aef5_row111_col5" class="data row111 col5" >0.051360</td> | |
<td id="T_7aef5_row111_col6" class="data row111 col6" >0.171450</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row115" class="row_heading level0 row115" >math_algebra_hard.exact_match</th> | |
<td id="T_7aef5_row115_col0" class="data row115 col0" >0.501629</td> | |
<td id="T_7aef5_row115_col1" class="data row115 col1" >0.618893</td> | |
<td id="T_7aef5_row115_col2" class="data row115 col2" >0.026059</td> | |
<td id="T_7aef5_row115_col3" class="data row115 col3" >0.000000</td> | |
<td id="T_7aef5_row115_col4" class="data row115 col4" >0.000000</td> | |
<td id="T_7aef5_row115_col5" class="data row115 col5" >0.100977</td> | |
<td id="T_7aef5_row115_col6" class="data row115 col6" >0.351792</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row118" class="row_heading level0 row118" >math_counting_and_prob_hard.exact_match</th> | |
<td id="T_7aef5_row118_col0" class="data row118 col0" >0.162602</td> | |
<td id="T_7aef5_row118_col1" class="data row118 col1" >0.349593</td> | |
<td id="T_7aef5_row118_col2" class="data row118 col2" >0.024390</td> | |
<td id="T_7aef5_row118_col3" class="data row118 col3" >0.000000</td> | |
<td id="T_7aef5_row118_col4" class="data row118 col4" >0.000000</td> | |
<td id="T_7aef5_row118_col5" class="data row118 col5" >0.016260</td> | |
<td id="T_7aef5_row118_col6" class="data row118 col6" >0.130081</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row121" class="row_heading level0 row121" >math_geometry_hard.exact_match</th> | |
<td id="T_7aef5_row121_col0" class="data row121 col0" >0.121212</td> | |
<td id="T_7aef5_row121_col1" class="data row121 col1" >0.250000</td> | |
<td id="T_7aef5_row121_col2" class="data row121 col2" >0.030303</td> | |
<td id="T_7aef5_row121_col3" class="data row121 col3" >0.000000</td> | |
<td id="T_7aef5_row121_col4" class="data row121 col4" >0.000000</td> | |
<td id="T_7aef5_row121_col5" class="data row121 col5" >0.022727</td> | |
<td id="T_7aef5_row121_col6" class="data row121 col6" >0.083333</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row124" class="row_heading level0 row124" >math_intermediate_algebra_hard.exact_match</th> | |
<td id="T_7aef5_row124_col0" class="data row124 col0" >0.064286</td> | |
<td id="T_7aef5_row124_col1" class="data row124 col1" >0.153571</td> | |
<td id="T_7aef5_row124_col2" class="data row124 col2" >0.007143</td> | |
<td id="T_7aef5_row124_col3" class="data row124 col3" >0.000000</td> | |
<td id="T_7aef5_row124_col4" class="data row124 col4" >0.000000</td> | |
<td id="T_7aef5_row124_col5" class="data row124 col5" >0.007143</td> | |
<td id="T_7aef5_row124_col6" class="data row124 col6" >0.028571</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row127" class="row_heading level0 row127" >math_num_theory_hard.exact_match</th> | |
<td id="T_7aef5_row127_col0" class="data row127 col0" >0.266234</td> | |
<td id="T_7aef5_row127_col1" class="data row127 col1" >0.415584</td> | |
<td id="T_7aef5_row127_col2" class="data row127 col2" >0.012987</td> | |
<td id="T_7aef5_row127_col3" class="data row127 col3" >0.000000</td> | |
<td id="T_7aef5_row127_col4" class="data row127 col4" >0.000000</td> | |
<td id="T_7aef5_row127_col5" class="data row127 col5" >0.045455</td> | |
<td id="T_7aef5_row127_col6" class="data row127 col6" >0.097403</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row130" class="row_heading level0 row130" >math_prealgebra_hard.exact_match</th> | |
<td id="T_7aef5_row130_col0" class="data row130 col0" >0.430052</td> | |
<td id="T_7aef5_row130_col1" class="data row130 col1" >0.647668</td> | |
<td id="T_7aef5_row130_col2" class="data row130 col2" >0.005181</td> | |
<td id="T_7aef5_row130_col3" class="data row130 col3" >0.000000</td> | |
<td id="T_7aef5_row130_col4" class="data row130 col4" >0.000000</td> | |
<td id="T_7aef5_row130_col5" class="data row130 col5" >0.103627</td> | |
<td id="T_7aef5_row130_col6" class="data row130 col6" >0.326425</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row133" class="row_heading level0 row133" >math_precalculus_hard.exact_match</th> | |
<td id="T_7aef5_row133_col0" class="data row133 col0" >0.074074</td> | |
<td id="T_7aef5_row133_col1" class="data row133 col1" >0.125926</td> | |
<td id="T_7aef5_row133_col2" class="data row133 col2" >0.007407</td> | |
<td id="T_7aef5_row133_col3" class="data row133 col3" >0.000000</td> | |
<td id="T_7aef5_row133_col4" class="data row133 col4" >0.000000</td> | |
<td id="T_7aef5_row133_col5" class="data row133 col5" >0.022222</td> | |
<td id="T_7aef5_row133_col6" class="data row133 col6" >0.044444</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row136" class="row_heading level0 row136" >mmlu_pro.acc</th> | |
<td id="T_7aef5_row136_col0" class="data row136 col0" >0.192487</td> | |
<td id="T_7aef5_row136_col1" class="data row136 col1" >0.297041</td> | |
<td id="T_7aef5_row136_col2" class="data row136 col2" >0.299119</td> | |
<td id="T_7aef5_row136_col3" class="data row136 col3" >0.118684</td> | |
<td id="T_7aef5_row136_col4" class="data row136 col4" >0.232131</td> | |
<td id="T_7aef5_row136_col5" class="data row136 col5" >0.324551</td> | |
<td id="T_7aef5_row136_col6" class="data row136 col6" >0.319481</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row138" class="row_heading level0 row138" >musr.acc_norm</th> | |
<td id="T_7aef5_row138_col0" class="data row138 col0" >0.334656</td> | |
<td id="T_7aef5_row138_col1" class="data row138 col1" >0.390212</td> | |
<td id="T_7aef5_row138_col2" class="data row138 col2" >0.392857</td> | |
<td id="T_7aef5_row138_col3" class="data row138 col3" >0.362434</td> | |
<td id="T_7aef5_row138_col4" class="data row138 col4" >0.365079</td> | |
<td id="T_7aef5_row138_col5" class="data row138 col5" >0.382275</td> | |
<td id="T_7aef5_row138_col6" class="data row138 col6" >0.351852</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row142" class="row_heading level0 row142" >musr_murder_mysteries.acc_norm</th> | |
<td id="T_7aef5_row142_col0" class="data row142 col0" >0.504000</td> | |
<td id="T_7aef5_row142_col1" class="data row142 col1" >0.520000</td> | |
<td id="T_7aef5_row142_col2" class="data row142 col2" >0.524000</td> | |
<td id="T_7aef5_row142_col3" class="data row142 col3" >0.492000</td> | |
<td id="T_7aef5_row142_col4" class="data row142 col4" >0.492000</td> | |
<td id="T_7aef5_row142_col5" class="data row142 col5" >0.540000</td> | |
<td id="T_7aef5_row142_col6" class="data row142 col6" >0.504000</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row145" class="row_heading level0 row145" >musr_object_placements.acc_norm</th> | |
<td id="T_7aef5_row145_col0" class="data row145 col0" >0.238281</td> | |
<td id="T_7aef5_row145_col1" class="data row145 col1" >0.281250</td> | |
<td id="T_7aef5_row145_col2" class="data row145 col2" >0.289062</td> | |
<td id="T_7aef5_row145_col3" class="data row145 col3" >0.234375</td> | |
<td id="T_7aef5_row145_col4" class="data row145 col4" >0.214844</td> | |
<td id="T_7aef5_row145_col5" class="data row145 col5" >0.351562</td> | |
<td id="T_7aef5_row145_col6" class="data row145 col6" >0.226562</td> | |
</tr> | |
<tr> | |
<th id="T_7aef5_level0_row148" class="row_heading level0 row148" >musr_team_allocation.acc_norm</th> | |
<td id="T_7aef5_row148_col0" class="data row148 col0" >0.264000</td> | |
<td id="T_7aef5_row148_col1" class="data row148 col1" >0.372000</td> | |
<td id="T_7aef5_row148_col2" class="data row148 col2" >0.368000</td> | |
<td id="T_7aef5_row148_col3" class="data row148 col3" >0.364000</td> | |
<td id="T_7aef5_row148_col4" class="data row148 col4" >0.392000</td> | |
<td id="T_7aef5_row148_col5" class="data row148 col5" >0.256000</td> | |
<td id="T_7aef5_row148_col6" class="data row148 col6" >0.328000</td> | |
</tr> | |
</tbody> | |
</table> | |