T145 commited on
Commit
190123d
Β·
1 Parent(s): 5ef372e

Use official test keys

Browse files
Files changed (1) hide show
  1. functions.py +4 -2
functions.py CHANGED
@@ -21,12 +21,14 @@ The purpose of this PR is to add evaluation results from the Open LLM Leaderboar
21
 
22
  Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
23
 
 
 
24
  KEY_IFEVAL = "IFEval"
25
  KEY_BBH = "BBH"
26
  KEY_MATH = "MATH Lvl 5"
27
  KEY_GPQA = "GPQA"
28
- KEY_MUSR = "MuSR"
29
- KEY_MMLU = "MMLU-Pro"
30
 
31
  def normalize_within_range(value, lower_bound=0, higher_bound=1):
32
  return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100
 
21
 
22
  Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
23
 
24
+ # Keys are named after the backend keys
25
+ # https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/blob/main/backend/README.md#leaderboard
26
  KEY_IFEVAL = "IFEval"
27
  KEY_BBH = "BBH"
28
  KEY_MATH = "MATH Lvl 5"
29
  KEY_GPQA = "GPQA"
30
+ KEY_MUSR = "MUSR"
31
+ KEY_MMLU = "MMLU-PRO"
32
 
33
  def normalize_within_range(value, lower_bound=0, higher_bound=1):
34
  return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100