import requests import pandas as pd from tqdm.auto import tqdm import gradio as gr from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load RL_ENVS = ['LunarLander-v2','CarRacing-v0','MountainCar-v0', 'BipedalWalker-v3','FrozenLake-v1','FrozenLake-v1-no_slippery', 'Taxi-v3','Cliffwalker-v0'] with open('app.css','r') as f: BLOCK_CSS = f.read() LOADED_MODEL_IDS = {rl_env:[] for rl_env in RL_ENVS} # Based on Omar Sanseviero work # Make model clickable link def make_clickable_model(model_name): # remove user from model name model_name_show = ' '.join(model_name.split('/')[1:]) link = "https://huggingface.co/" + model_name return f'{model_name_show}' # Make user clickable link def make_clickable_user(user_id): link = "https://huggingface.co/" + user_id return f'{user_id}' def get_model_ids(rl_env): api = HfApi() models = api.list_models(filter=rl_env) model_ids = [x.modelId for x in models] return model_ids def get_metadata(model_id): try: readme_path = hf_hub_download(model_id, filename="README.md") return metadata_load(readme_path) except requests.exceptions.HTTPError: # 404 README.md not found return None def parse_metrics_accuracy(meta): if "model-index" not in meta: return None result = meta["model-index"][0]["results"] metrics = result[0]["metrics"] accuracy = metrics[0]["value"] return accuracy # We keep the worst case episode def parse_rewards(accuracy): default_std = -1000 default_reward=-1000 if accuracy != None: parsed = accuracy.split(' +/- ') if len(parsed)>1: mean_reward = float(parsed[0]) std_reward = float(parsed[1]) else: mean_reward = default_std std_reward = default_reward else: mean_reward = default_std std_reward = default_reward return mean_reward, std_reward def get_data(rl_env): global LOADED_MODEL_IDS data = [] model_ids = get_model_ids(rl_env) LOADED_MODEL_IDS[rl_env]+=model_ids for model_id in tqdm(model_ids): meta = get_metadata(model_id) if meta is None: continue user_id = model_id.split('/')[0] row = {} row["User"] = user_id row["Model"] = model_id accuracy = parse_metrics_accuracy(meta) mean_reward, std_reward = parse_rewards(accuracy) row["Results"] = mean_reward - std_reward row["Mean Reward"] = mean_reward row["Std Reward"] = std_reward data.append(row) return pd.DataFrame.from_records(data) def update_data(rl_env): global LOADED_MODEL_IDS data = [] model_ids = [x for x in get_model_ids(rl_env) if x not in LOADED_MODEL_IDS[rl_env]] LOADED_MODEL_IDS[rl_env]+=model_ids for model_id in tqdm(model_ids): meta = get_metadata(model_id) if meta is None: continue user_id = model_id.split('/')[0] row = {} row["User"] = user_id row["Model"] = model_id accuracy = parse_metrics_accuracy(meta) mean_reward, std_reward = parse_rewards(accuracy) row["Results"] = mean_reward - std_reward row["Mean Reward"] = mean_reward row["Std Reward"] = std_reward data.append(row) return pd.DataFrame.from_records(data) def update_data_per_env(rl_env): global RL_DETAILS _,old_dataframe,_ = RL_DETAILS[rl_env]['data'] new_dataframe = update_data(rl_env) new_dataframe = new_dataframe.fillna("") if not new_dataframe.empty: new_dataframe["User"] = new_dataframe["User"].apply(make_clickable_user) new_dataframe["Model"] = new_dataframe["Model"].apply(make_clickable_model) dataframe = pd.concat([old_dataframe,new_dataframe]) if not dataframe.empty: dataframe = dataframe.sort_values(by=['Results'], ascending=False) if not 'Ranking' in dataframe.columns: dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)]) else: dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)] table_html = dataframe.to_html(escape=False, index=False,justify = 'left') return table_html,dataframe,dataframe.empty else: html = """

⌛ Please wait. Results will be out soon...

""" return html,dataframe,dataframe.empty def get_data_per_env(rl_env): dataframe = get_data(rl_env) dataframe = dataframe.fillna("") if not dataframe.empty: # turn the model ids into clickable links dataframe["User"] = dataframe["User"].apply(make_clickable_user) dataframe["Model"] = dataframe["Model"].apply(make_clickable_model) dataframe = dataframe.sort_values(by=['Results'], ascending=False) if not 'Ranking' in dataframe.columns: dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)]) else: dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)] table_html = dataframe.to_html(escape=False, index=False,justify = 'left') return table_html,dataframe,dataframe.empty else: html = """

⌛ Please wait. Results will be out soon...

""" return html,dataframe,dataframe.empty def get_info_display(len_dataframe,env_name,name_leaderboard,is_empty): if not is_empty: markdown = """

{name_leaderboard}


This is a leaderboard of {len_dataframe} agents playing {env_name} 👩‍🚀.


We use lower bound result to sort the models: mean_reward - std_reward.


You can click on the model's name to be redirected to its model card which includes documentation.


You want to try your model? Read this Unit 1 of Deep Reinforcement Learning Class.

""".format(len_dataframe = len_dataframe,env_name = env_name,name_leaderboard = name_leaderboard) else: markdown = """

{name_leaderboard}


""".format(name_leaderboard = name_leaderboard) return markdown def reload_all_data(): global RL_DETAILS,RL_ENVS for rl_env in RL_ENVS: RL_DETAILS[rl_env]['data'] = update_data_per_env(rl_env) html = """

✅ Leaderboard updated! Click `Reload Leaderboard` to see the current leaderboard.

""" return html def reload_leaderboard(rl_env): global RL_DETAILS data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data'] markdown = get_info_display(len(data_dataframe),rl_env,RL_DETAILS[rl_env]['title'],is_empty) return markdown,data_html RL_DETAILS ={'CarRacing-v0':{'title':" The Car Racing 🏎️ Leaderboard 🚀",'data':get_data_per_env('CarRacing-v0')}, 'MountainCar-v0':{'title':"The Mountain Car ⛰️ 🚗 Leaderboard 🚀",'data':get_data_per_env('MountainCar-v0')}, 'LunarLander-v2':{'title':"The Lunar Lander 🌕 Leaderboard 🚀",'data':get_data_per_env('LunarLander-v2')}, 'BipedalWalker-v3':{'title':"The BipedalWalker Leaderboard 🚀",'data':get_data_per_env('BipedalWalker-v3')}, 'FrozenLake-v1':{'title':"The FrozenLake Leaderboard 🚀",'data':get_data_per_env('FrozenLake-v1')}, 'FrozenLake-v1-no_slippery':{'title':'The FrozenLake-v1-no_slippery Leaderboard 🚀','data':get_data_per_env('FrozenLake-v1-no_slippery')}, 'Taxi-v3':{'title':'The Taxi-v3🚖 Leaderboard 🚀','data':get_data_per_env('Taxi-v3')}, 'Cliffwalker-v0':{'title':'The Cliffwalker-v0 Leaderboard 🚀','data':get_data_per_env('Cliffwalker-v0')}, } block = gr.Blocks(css=BLOCK_CSS) with block: notification = gr.HTML("""

⌛ Updating leaderboard...

""") block.load(reload_all_data,[],[notification]) with gr.Tabs(): for rl_env in RL_ENVS: with gr.TabItem(rl_env) as rl_tab: data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data'] markdown = get_info_display(len(data_dataframe),rl_env,RL_DETAILS[rl_env]['title'],is_empty) env_state =gr.Variable(default_value=rl_env) output_markdown = gr.HTML(markdown) reload = gr.Button('Reload Leaderboard') output_html = gr.HTML(data_html) reload.click(reload_leaderboard,inputs=[env_state],outputs=[output_markdown,output_html]) rl_tab.select(reload_leaderboard,inputs=[env_state],outputs=[output_markdown,output_html]) block.launch()