Use official test keys
Browse files- functions.py +4 -2
functions.py
CHANGED
@@ -21,12 +21,14 @@ The purpose of this PR is to add evaluation results from the Open LLM Leaderboar
|
|
21 |
|
22 |
Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
|
23 |
|
|
|
|
|
24 |
KEY_IFEVAL = "IFEval"
|
25 |
KEY_BBH = "BBH"
|
26 |
KEY_MATH = "MATH Lvl 5"
|
27 |
KEY_GPQA = "GPQA"
|
28 |
-
KEY_MUSR = "
|
29 |
-
KEY_MMLU = "MMLU-
|
30 |
|
31 |
def normalize_within_range(value, lower_bound=0, higher_bound=1):
|
32 |
return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100
|
|
|
21 |
|
22 |
Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
|
23 |
|
24 |
+
# Keys are named after the backend keys
|
25 |
+
# https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/blob/main/backend/README.md#leaderboard
|
26 |
KEY_IFEVAL = "IFEval"
|
27 |
KEY_BBH = "BBH"
|
28 |
KEY_MATH = "MATH Lvl 5"
|
29 |
KEY_GPQA = "GPQA"
|
30 |
+
KEY_MUSR = "MUSR"
|
31 |
+
KEY_MMLU = "MMLU-PRO"
|
32 |
|
33 |
def normalize_within_range(value, lower_bound=0, higher_bound=1):
|
34 |
return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100
|