from collections import defaultdict from src.leaderboards.saved import leaderboard_to_tags from src.static.env import API def group_all_tags(input_tags: list[str]) -> dict: """Groups the tags by categories, following the division in the README. Args: input_tags (list[str]): list of tags Returns: dict: category to tag list """ output_tags = defaultdict(list) for tag in input_tags: if tag == "arena": output_tags["judge"].append("humans") continue try: category, value = tag.split(":") output_tags[category].append(value) except ValueError: continue return output_tags def get_leaderboard_info() -> tuple[list, dict]: """Looks up all spaces tagged as leaderboards or arenas on the hub, and homogeneizes their tags. Returns: dict: All leaderboard names to their tag dicts by category """ leaderboards = [ (s.id, s.tags) for s in API.list_spaces( filter=["leaderboard"] )] arenas = [ (s.id, s.tags) for s in API.list_spaces( filter=["arena"] )] saved_leaderboards = [(k, v) for k, v in leaderboard_to_tags.items()] seen_leaderboards = [] leaderboard_to_info = defaultdict(list) info_to_leaderboard = defaultdict(lambda: defaultdict(list)) for name, tags in leaderboards + arenas + saved_leaderboards: # If we have a duplicate between the leaderboards from the hub (leaderboards, arena) # and the ones we saved manually, we use the version from the hub if name in seen_leaderboards: continue seen_leaderboards.append(name) # If the model has its own tags, plus the ones we saved, we aggregate them if name in leaderboard_to_tags: tags += leaderboard_to_tags[name] grouped_tags = group_all_tags(tags) for category, tags in grouped_tags.items(): for tag in tags: info_to_leaderboard[category][tag].append(name) leaderboard_to_info[name].append(f"{category}:{tag}") # We pass everything to sets for leaderboard, tags in leaderboard_to_info.items(): leaderboard_to_info[leaderboard] = sorted(list(set(tags))) for category, category_dict in info_to_leaderboard.items(): for tag, space_list in category_dict.items(): info_to_leaderboard[category][tag] = sorted(list(set(space_list))) info_to_leaderboard["all"] = sorted(list(set(seen_leaderboards))) return leaderboard_to_info, info_to_leaderboard