Spaces:
Runtime error
Runtime error
RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results" | |
# EXCLUDED_KEYS = { | |
# "pretty_env_info", | |
# "chat_template", | |
# "group_subtasks", | |
# } | |
# EXCLUDED_RESULTS_KEYS = { | |
# "leaderboard", | |
# } | |
# EXCLUDED_RESULTS_LEADERBOARDS_KEYS = { | |
# "alias", | |
# } | |
DETAILS_DATASET_ID = "datasets/open-llm-leaderboard/{model_name_sanitized}-details" | |
DETAILS_FILENAME = "samples_{subtask}_*.json" | |
TASKS = { | |
# "leaderboard_arc_challenge": ("ARC", "leaderboard_arc_challenge"), | |
"leaderboard_bbh": ("BBH", "leaderboard_bbh"), | |
"leaderboard_gpqa": ("GPQA", "leaderboard_gpqa"), | |
"leaderboard_ifeval": ("IFEval", "leaderboard_ifeval"), | |
"leaderboard_math_hard": ("MATH", "leaderboard_math"), | |
"leaderboard_mmlu_pro": ("MMLU-Pro", "leaderboard_mmlu_pro"), | |
"leaderboard_musr": ("MuSR", "leaderboard_musr"), | |
} | |
SUBTASKS = { | |
# "leaderboard_arc_challenge": ["leaderboard_arc_challenge"], | |
"leaderboard_bbh": [ | |
"leaderboard_bbh_boolean_expressions", | |
"leaderboard_bbh_causal_judgement", | |
"leaderboard_bbh_date_understanding", | |
"leaderboard_bbh_disambiguation_qa", | |
"leaderboard_bbh_formal_fallacies", | |
"leaderboard_bbh_geometric_shapes", | |
"leaderboard_bbh_hyperbaton", | |
"leaderboard_bbh_logical_deduction_five_objects", | |
"leaderboard_bbh_logical_deduction_seven_objects", | |
"leaderboard_bbh_logical_deduction_three_objects", | |
"leaderboard_bbh_movie_recommendation", | |
"leaderboard_bbh_navigate", | |
"leaderboard_bbh_object_counting", | |
"leaderboard_bbh_penguins_in_a_table", | |
"leaderboard_bbh_reasoning_about_colored_objects", | |
"leaderboard_bbh_ruin_names", | |
"leaderboard_bbh_salient_translation_error_detection", | |
"leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding", | |
"leaderboard_bbh_temporal_sequences", | |
"leaderboard_bbh_tracking_shuffled_objects_five_objects", | |
"leaderboard_bbh_tracking_shuffled_objects_seven_objects", | |
"leaderboard_bbh_tracking_shuffled_objects_three_objects", | |
"leaderboard_bbh_web_of_lies", | |
], | |
"leaderboard_gpqa": [ | |
"leaderboard_gpqa_extended", | |
"leaderboard_gpqa_diamond", | |
"leaderboard_gpqa_main", | |
], | |
"leaderboard_ifeval": ["leaderboard_ifeval"], | |
# "leaderboard_math_hard": [ | |
"leaderboard_math": [ | |
"leaderboard_math_algebra_hard", | |
"leaderboard_math_counting_and_prob_hard", | |
"leaderboard_math_geometry_hard", | |
"leaderboard_math_intermediate_algebra_hard", | |
"leaderboard_math_num_theory_hard", | |
"leaderboard_math_prealgebra_hard", | |
"leaderboard_math_precalculus_hard", | |
], | |
"leaderboard_mmlu_pro": ["leaderboard_mmlu_pro"], | |
"leaderboard_musr": [ | |
"leaderboard_musr_murder_mysteries", | |
"leaderboard_musr_object_placements", | |
"leaderboard_musr_team_allocation", | |
], | |
} | |