import requests import pandas as pd from tqdm.auto import tqdm from utils import * import gradio as gr from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load class DeepRL_Leaderboard: def __init__(self) -> None: self.leaderboard= {} def add_leaderboard(self,id=None, title=None): if id is not None and title is not None: id = id.strip() title = title.strip() self.leaderboard.update({id:{'title':title,'data':get_data_per_env(id)}}) def get_data(self): return self.leaderboard def get_ids(self): return list(self.leaderboard.keys()) # CSS file for the with open('app.css','r') as f: BLOCK_CSS = f.read() LOADED_MODEL_IDS = {} LOADED_MODEL_METADATA = {} def get_data(rl_env): global LOADED_MODEL_IDS ,LOADED_MODEL_METADATA data = [] model_ids = get_model_ids(rl_env) LOADED_MODEL_IDS[rl_env]=model_ids for model_id in tqdm(model_ids): meta = get_metadata(model_id) LOADED_MODEL_METADATA[model_id] = meta if meta is not None else '' if meta is None: continue user_id = model_id.split('/')[0] row = {} row["User"] = user_id row["Model"] = model_id accuracy = parse_metrics_accuracy(meta) mean_reward, std_reward = parse_rewards(accuracy) mean_reward = mean_reward if not pd.isna(mean_reward) else 0 std_reward = std_reward if not pd.isna(std_reward) else 0 row["Results"] = mean_reward - std_reward row["Mean Reward"] = mean_reward row["Std Reward"] = std_reward data.append(row) return pd.DataFrame.from_records(data) def get_data_per_env(rl_env): dataframe = get_data(rl_env) dataframe = dataframe.fillna("") if not dataframe.empty: # turn the model ids into clickable links dataframe["User"] = dataframe["User"].apply(make_clickable_user) dataframe["Model"] = dataframe["Model"].apply(make_clickable_model) dataframe = dataframe.sort_values(by=['Results'], ascending=False) if not 'Ranking' in dataframe.columns: dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)]) else: dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)] table_html = dataframe.to_html(escape=False, index=False,justify = 'left') return table_html,dataframe,dataframe.empty else: html = """

βŒ› Please wait. Results will be out soon...

""" return html,dataframe,dataframe.empty rl_leaderboard = DeepRL_Leaderboard() rl_leaderboard.add_leaderboard('CarRacing-v0'," The Car Racing 🏎️ Leaderboard") rl_leaderboard.add_leaderboard('MountainCar-v0',"The Mountain Car ⛰️ πŸš— Leaderboard") rl_leaderboard.add_leaderboard('LunarLander-v2',"The Lunar Lander πŸŒ• Leaderboard") rl_leaderboard.add_leaderboard('BipedalWalker-v3',"The BipedalWalker Leaderboard") rl_leaderboard.add_leaderboard('Taxi-v3','The Taxi-v3πŸš– Leaderboard') rl_leaderboard.add_leaderboard('FrozenLake-v1-4x4-no_slippery','The FrozenLake-v1-4x4-no_slippery Leaderboard') rl_leaderboard.add_leaderboard('FrozenLake-v1-8x8-no_slippery','The FrozenLake-v1-8x8-no_slippery Leaderboard') rl_leaderboard.add_leaderboard('FrozenLake-v1-4x4','The FrozenLake-v1-4x4 Leaderboard') rl_leaderboard.add_leaderboard('FrozenLake-v1-8x8','The FrozenLake-v1-8x8 Leaderboard') rl_leaderboard.add_leaderboard('SpaceInvadersNoFrameskip-v4','The SpaceInvadersNoFrameskip-v4 Leaderboard') rl_leaderboard.add_leaderboard('CartPole-v1','The Cartpole-v1 Leaderboard') rl_leaderboard.add_leaderboard('Pong-PLE-v0','The Pong-PLE-v0 🎾 Leaderboard') rl_leaderboard.add_leaderboard('Walker2DBulletEnv-v0','The Walker2DBulletEnv-v0 πŸ€– Leaderboard') rl_leaderboard.add_leaderboard('AntBulletEnv-v0','The AntBulletEnv-v0 πŸ•ΈοΈ Leaderboard') rl_leaderboard.add_leaderboard('HalfCheetahBulletEnv-v0','The HalfCheetahBulletEnv-v0 πŸ€– Leaderboard') RL_ENVS = rl_leaderboard.get_ids() RL_DETAILS = rl_leaderboard.get_data() def update_data(rl_env): global LOADED_MODEL_IDS,LOADED_MODEL_METADATA data = [] model_ids = [x for x in get_model_ids(rl_env) if x not in LOADED_MODEL_IDS[rl_env]] #model_ids = [x for x in get_model_ids(rl_env) if x not in LOADED_MODEL_IDS[rl_env] or LOADED_MODEL_METADATA[x]!=get_metadata(x)] this still calls get_metadata() so won't work #model_ids = [x for x in get_model_ids(rl_env)] LOADED_MODEL_IDS[rl_env]+=model_ids for model_id in tqdm(model_ids): meta = get_metadata(model_id) LOADED_MODEL_METADATA[model_id] = meta if meta is not None else '' if meta is None: continue user_id = model_id.split('/')[0] row = {} row["User"] = user_id row["Model"] = model_id accuracy = parse_metrics_accuracy(meta) mean_reward, std_reward = parse_rewards(accuracy) mean_reward = mean_reward if not pd.isna(mean_reward) else 0 std_reward = std_reward if not pd.isna(std_reward) else 0 row["Results"] = mean_reward - std_reward row["Mean Reward"] = mean_reward row["Std Reward"] = std_reward data.append(row) return pd.DataFrame.from_records(data) def update_data_per_env(rl_env): global RL_DETAILS _,old_dataframe,_ = RL_DETAILS[rl_env]['data'] new_dataframe = update_data(rl_env) new_dataframe = new_dataframe.fillna("") if not new_dataframe.empty: new_dataframe["User"] = new_dataframe["User"].apply(make_clickable_user) new_dataframe["Model"] = new_dataframe["Model"].apply(make_clickable_model) dataframe = pd.concat([old_dataframe,new_dataframe]) if not dataframe.empty: dataframe = dataframe.sort_values(by=['Results'], ascending=False) if not 'Ranking' in dataframe.columns: dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)]) else: dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)] table_html = dataframe.to_html(escape=False, index=False,justify = 'left') return table_html,dataframe,dataframe.empty else: html = """

βŒ› Please wait. Results will be out soon...

""" return html,dataframe,dataframe.empty def get_info_display(dataframe,env_name,name_leaderboard,is_empty): if not is_empty: markdown = """

{name_leaderboard}


This is a leaderboard of {len_dataframe} agents, from {num_unique_users} unique users, playing {env_name} πŸ‘©β€πŸš€.


We use lower bound result to sort the models: mean_reward - std_reward.


You can click on the model's name to be redirected to its model card which includes documentation.


You want to try to train your agents? Sign up to the Hugging Face free Deep Reinforcement Learning Class πŸ€— .

""".format(len_dataframe = len(dataframe),env_name = env_name,name_leaderboard = name_leaderboard,num_unique_users = len(set(dataframe['User'].values))) else: markdown = """

{name_leaderboard}


""".format(name_leaderboard = name_leaderboard) return markdown def reload_all_data(): global RL_DETAILS,RL_ENVS for rl_env in RL_ENVS: RL_DETAILS[rl_env]['data'] = update_data_per_env(rl_env) html = """

βœ… Leaderboard updated!

""" return html def reload_leaderboard(rl_env): global RL_DETAILS data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data'] markdown = get_info_display(data_dataframe,rl_env,RL_DETAILS[rl_env]['title'],is_empty) return markdown,data_html block = gr.Blocks(css=BLOCK_CSS) with block: notification = gr.HTML("""

βŒ› Updating leaderboard...

""") block.load(reload_all_data,[],[notification]) with gr.Tabs(): for rl_env in RL_ENVS: with gr.TabItem(rl_env) as rl_tab: data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data'] markdown = get_info_display(data_dataframe,rl_env,RL_DETAILS[rl_env]['title'],is_empty) env_state =gr.Variable(value=f'\"{rl_env}\"') output_markdown = gr.HTML(markdown) output_html = gr.HTML(data_html) rl_tab.select(reload_leaderboard,inputs=[env_state],outputs=[output_markdown,output_html]) block.launch()