ThomasSimonini HF staff mamta commited on
Commit
7dfd834
β€’
0 Parent(s):

Duplicate from huggingface-projects/Deep-Reinforcement-Learning-Leaderboard

Browse files
Files changed (6) hide show
  1. .gitattributes +27 -0
  2. .gitignore +1 -0
  3. README.md +13 -0
  4. app.css +37 -0
  5. app.py +238 -0
  6. utils.py +68 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/*
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deep Reinforcement Learning Leaderboard
3
+ emoji: πŸš€
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.0.20
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.css ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .infoPoint h1 {
3
+ font-size: 30px;
4
+ text-decoration: bold;
5
+
6
+ }
7
+
8
+ a {
9
+ text-decoration: underline;
10
+ color: #1f3b54 ;
11
+ }
12
+
13
+ table {
14
+
15
+ margin: 25px 0;
16
+ font-size: 0.9em;
17
+ font-family: sans-serif;
18
+ min-width: 400px;
19
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
20
+ }
21
+
22
+ table th,
23
+ table td {
24
+ padding: 12px 15px;
25
+ }
26
+
27
+ tr {
28
+ text-align: left;
29
+ }
30
+ thead tr {
31
+ text-align: left;
32
+ }
33
+
34
+ .flex
35
+ {
36
+ overflow:auto;
37
+ }
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from tqdm.auto import tqdm
4
+ from utils import *
5
+ import gradio as gr
6
+
7
+ from huggingface_hub import HfApi, hf_hub_download
8
+ from huggingface_hub.repocard import metadata_load
9
+
10
+ class DeepRL_Leaderboard:
11
+ def __init__(self) -> None:
12
+ self.leaderboard= {}
13
+
14
+ def add_leaderboard(self,id=None, title=None):
15
+ if id is not None and title is not None:
16
+ id = id.strip()
17
+ title = title.strip()
18
+ self.leaderboard.update({id:{'title':title,'data':get_data_per_env(id)}})
19
+
20
+ def get_data(self):
21
+ return self.leaderboard
22
+
23
+ def get_ids(self):
24
+ return list(self.leaderboard.keys())
25
+
26
+ # CSS file for the
27
+ with open('app.css','r') as f:
28
+ BLOCK_CSS = f.read()
29
+
30
+ LOADED_MODEL_IDS = {}
31
+ LOADED_MODEL_METADATA = {}
32
+
33
+ def get_data(rl_env):
34
+ global LOADED_MODEL_IDS ,LOADED_MODEL_METADATA
35
+ data = []
36
+ model_ids = get_model_ids(rl_env)
37
+ LOADED_MODEL_IDS[rl_env]=model_ids
38
+
39
+ for model_id in tqdm(model_ids):
40
+ meta = get_metadata(model_id)
41
+ LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
42
+ if meta is None:
43
+ continue
44
+ user_id = model_id.split('/')[0]
45
+ row = {}
46
+ row["User"] = user_id
47
+ row["Model"] = model_id
48
+ accuracy = parse_metrics_accuracy(meta)
49
+ mean_reward, std_reward = parse_rewards(accuracy)
50
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
51
+ std_reward = std_reward if not pd.isna(std_reward) else 0
52
+ row["Results"] = mean_reward - std_reward
53
+ row["Mean Reward"] = mean_reward
54
+ row["Std Reward"] = std_reward
55
+ data.append(row)
56
+ return pd.DataFrame.from_records(data)
57
+
58
+ def get_data_per_env(rl_env):
59
+ dataframe = get_data(rl_env)
60
+ dataframe = dataframe.fillna("")
61
+
62
+ if not dataframe.empty:
63
+ # turn the model ids into clickable links
64
+ dataframe["User"] = dataframe["User"].apply(make_clickable_user)
65
+ dataframe["Model"] = dataframe["Model"].apply(make_clickable_model)
66
+ dataframe = dataframe.sort_values(by=['Results'], ascending=False)
67
+ if not 'Ranking' in dataframe.columns:
68
+ dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
69
+ else:
70
+ dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)]
71
+ table_html = dataframe.to_html(escape=False, index=False,justify = 'left')
72
+ return table_html,dataframe,dataframe.empty
73
+ else:
74
+ html = """<div style="color: green">
75
+ <p> βŒ› Please wait. Results will be out soon... </p>
76
+ </div>
77
+ """
78
+ return html,dataframe,dataframe.empty
79
+
80
+
81
+
82
+ rl_leaderboard = DeepRL_Leaderboard()
83
+ rl_leaderboard.add_leaderboard('CartPole-v1','The Cartpole-v1 Leaderboard')
84
+ rl_leaderboard.add_leaderboard('LunarLander-v2',"The Lunar Lander πŸŒ• Leaderboard")
85
+ rl_leaderboard.add_leaderboard('FrozenLake-v1-4x4-no_slippery','The FrozenLake-v1-4x4-no_slippery Leaderboard')
86
+ rl_leaderboard.add_leaderboard('FrozenLake-v1-8x8-no_slippery','The FrozenLake-v1-8x8-no_slippery Leaderboard')
87
+ rl_leaderboard.add_leaderboard('FrozenLake-v1-4x4','The FrozenLake-v1-4x4 Leaderboard')
88
+ rl_leaderboard.add_leaderboard('FrozenLake-v1-8x8','The FrozenLake-v1-8x8 Leaderboard')
89
+ rl_leaderboard.add_leaderboard('Taxi-v3','The Taxi-v3πŸš– Leaderboard')
90
+ rl_leaderboard.add_leaderboard('CarRacing-v0'," The Car Racing 🏎️ Leaderboard")
91
+ rl_leaderboard.add_leaderboard('MountainCar-v0',"The Mountain Car ⛰️ πŸš— Leaderboard")
92
+ rl_leaderboard.add_leaderboard('BipedalWalker-v3',"The BipedalWalker Leaderboard")
93
+ rl_leaderboard.add_leaderboard('SpaceInvadersNoFrameskip-v4','The SpaceInvadersNoFrameskip-v4 Leaderboard')
94
+ rl_leaderboard.add_leaderboard('Pixelcopter-PLE-v0','The Pixelcopter-PLE-v0 🚁 Leaderboard')
95
+ rl_leaderboard.add_leaderboard('Pong-PLE-v0','The Pong-PLE-v0 🎾 Leaderboard')
96
+ rl_leaderboard.add_leaderboard('Walker2DBulletEnv-v0','The Walker2DBulletEnv-v0 πŸ€– Leaderboard')
97
+ rl_leaderboard.add_leaderboard('AntBulletEnv-v0','The AntBulletEnv-v0 πŸ•ΈοΈ Leaderboard')
98
+ rl_leaderboard.add_leaderboard('HalfCheetahBulletEnv-v0','The HalfCheetahBulletEnv-v0 πŸ€– Leaderboard')
99
+ RL_ENVS = rl_leaderboard.get_ids()
100
+ RL_DETAILS = rl_leaderboard.get_data()
101
+
102
+
103
+ def update_data(rl_env):
104
+ global LOADED_MODEL_IDS,LOADED_MODEL_METADATA
105
+ data = []
106
+ model_ids = [x for x in get_model_ids(rl_env)] #if x not in LOADED_MODEL_IDS[rl_env]] # For now let's update all
107
+
108
+ LOADED_MODEL_IDS[rl_env]+=model_ids
109
+
110
+ for model_id in tqdm(model_ids):
111
+ meta = get_metadata(model_id)
112
+ LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
113
+ if meta is None:
114
+ continue
115
+ user_id = model_id.split('/')[0]
116
+ row = {}
117
+ row["User"] = user_id
118
+ row["Model"] = model_id
119
+ accuracy = parse_metrics_accuracy(meta)
120
+ mean_reward, std_reward = parse_rewards(accuracy)
121
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
122
+ std_reward = std_reward if not pd.isna(std_reward) else 0
123
+
124
+ row["Results"] = mean_reward - std_reward
125
+ row["Mean Reward"] = mean_reward
126
+ row["Std Reward"] = std_reward
127
+ data.append(row)
128
+ return pd.DataFrame.from_records(data)
129
+
130
+
131
+ def update_data_per_env(rl_env):
132
+ global RL_DETAILS
133
+
134
+ _,old_dataframe,_ = RL_DETAILS[rl_env]['data']
135
+ new_dataframe = update_data(rl_env)
136
+
137
+ new_dataframe = new_dataframe.fillna("")
138
+ if not new_dataframe.empty:
139
+ new_dataframe["User"] = new_dataframe["User"].apply(make_clickable_user)
140
+ new_dataframe["Model"] = new_dataframe["Model"].apply(make_clickable_model)
141
+
142
+ dataframe = pd.concat([old_dataframe,new_dataframe])
143
+
144
+ if not dataframe.empty:
145
+
146
+ dataframe = dataframe.sort_values(by=['Results'], ascending=False)
147
+ if not 'Ranking' in dataframe.columns:
148
+ dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
149
+ else:
150
+ dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)]
151
+ table_html = dataframe.to_html(escape=False, index=False,justify = 'left')
152
+ return table_html,dataframe,dataframe.empty
153
+ else:
154
+ html = """<div style="color: green">
155
+ <p> βŒ› Please wait. Results will be out soon... </p>
156
+ </div>
157
+ """
158
+ return html,dataframe,dataframe.empty
159
+
160
+
161
+ def get_info_display(dataframe,env_name,name_leaderboard,is_empty):
162
+ if not is_empty:
163
+ markdown = """
164
+ <div class='infoPoint'>
165
+ <h1> {name_leaderboard} </h1>
166
+ <br>
167
+ <p> This is a leaderboard of <b>{len_dataframe}</b> agents, from <b>{num_unique_users}</b> unique users, playing {env_name} πŸ‘©β€πŸš€. </p>
168
+ <br>
169
+ <p> We use <b>lower bound result to sort the models: mean_reward - std_reward.</b> </p>
170
+ <br>
171
+ <p> You can click on the model's name to be redirected to its model card which includes documentation. </p>
172
+ <br>
173
+ <p> You want to try to train your agents? <a href="http://eepurl.com/h1pElX" target="_blank">Sign up to the Hugging Face free Deep Reinforcement Learning Class πŸ€— </a>.
174
+ </p>
175
+ <br>
176
+ <p> You want to compare two agents? <a href="https://huggingface.co/spaces/ThomasSimonini/Compare-Reinforcement-Learning-Agents" target="_blank">It's possible using this Spaces demo πŸ‘€ </a>.
177
+ </p>
178
+ </div>
179
+ """.format(len_dataframe = len(dataframe),env_name = env_name,name_leaderboard = name_leaderboard,num_unique_users = len(set(dataframe['User'].values)))
180
+
181
+ else:
182
+ markdown = """
183
+ <div class='infoPoint'>
184
+ <h1> {name_leaderboard} </h1>
185
+ <br>
186
+ </div>
187
+ """.format(name_leaderboard = name_leaderboard)
188
+ return markdown
189
+
190
+ def reload_all_data():
191
+
192
+ global RL_DETAILS,RL_ENVS
193
+
194
+ for rl_env in RL_ENVS:
195
+ RL_DETAILS[rl_env]['data'] = update_data_per_env(rl_env)
196
+
197
+ html = """<div style="color: green">
198
+ <p> βœ… Leaderboard updated! </p>
199
+ </div>
200
+ """
201
+ return html
202
+
203
+
204
+ def reload_leaderboard(rl_env):
205
+ global RL_DETAILS
206
+
207
+ data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data']
208
+
209
+ markdown = get_info_display(data_dataframe,rl_env,RL_DETAILS[rl_env]['title'],is_empty)
210
+
211
+ return markdown,data_html
212
+
213
+
214
+
215
+
216
+
217
+
218
+ block = gr.Blocks(css=BLOCK_CSS)
219
+ with block:
220
+ notification = gr.HTML("""<div style="color: green">
221
+ <p> βŒ› Updating leaderboard... </p>
222
+ </div>
223
+ """)
224
+ block.load(reload_all_data,[],[notification])
225
+
226
+ with gr.Tabs():
227
+ for rl_env in RL_ENVS:
228
+ with gr.TabItem(rl_env) as rl_tab:
229
+ data_html,data_dataframe,is_empty = RL_DETAILS[rl_env]['data']
230
+ markdown = get_info_display(data_dataframe,rl_env,RL_DETAILS[rl_env]['title'],is_empty)
231
+ env_state =gr.Variable(value=f'\"{rl_env}\"')
232
+ output_markdown = gr.HTML(markdown)
233
+
234
+ output_html = gr.HTML(data_html)
235
+
236
+ rl_tab.select(reload_leaderboard,inputs=[env_state],outputs=[output_markdown,output_html])
237
+
238
+ block.launch()
utils.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ from tqdm.auto import tqdm
4
+ from huggingface_hub import HfApi, hf_hub_download
5
+ from huggingface_hub.repocard import metadata_load
6
+
7
+
8
+ # Based on Omar Sanseviero work
9
+ # Make model clickable link
10
+ def make_clickable_model(model_name):
11
+ # remove user from model name
12
+ model_name_show = ' '.join(model_name.split('/')[1:])
13
+
14
+ link = "https://huggingface.co/" + model_name
15
+ return f'<a target="_blank" href="{link}">{model_name_show}</a>'
16
+
17
+ # Make user clickable link
18
+ def make_clickable_user(user_id):
19
+ link = "https://huggingface.co/" + user_id
20
+ return f'<a target="_blank" href="{link}">{user_id}</a>'
21
+
22
+
23
+
24
+ def get_model_ids(rl_env):
25
+ api = HfApi()
26
+ models = api.list_models(filter=rl_env)
27
+ model_ids = [x.modelId for x in models]
28
+ return model_ids
29
+
30
+ def get_metadata(model_id):
31
+ try:
32
+ readme_path = hf_hub_download(model_id, filename="README.md")
33
+ return metadata_load(readme_path)
34
+ except requests.exceptions.HTTPError:
35
+ # 404 README.md not found
36
+ return None
37
+
38
+ def parse_metrics_accuracy(meta):
39
+ if "model-index" not in meta:
40
+ return None
41
+ result = meta["model-index"][0]["results"]
42
+ metrics = result[0]["metrics"]
43
+ accuracy = metrics[0]["value"]
44
+ return accuracy
45
+
46
+ # We keep the worst case episode
47
+ def parse_rewards(accuracy):
48
+ default_std = -1000
49
+ default_reward=-1000
50
+ if accuracy != None:
51
+ accuracy = str(accuracy)
52
+ parsed = accuracy.split(' +/- ')
53
+ if len(parsed)>1:
54
+ mean_reward = float(parsed[0])
55
+ std_reward = float(parsed[1])
56
+ elif len(parsed)==1: #only mean reward
57
+ mean_reward = float(parsed[0])
58
+ std_reward = float(0)
59
+
60
+ else:
61
+ mean_reward = float(default_std)
62
+ std_reward = float(default_reward)
63
+
64
+ else:
65
+ mean_reward = float(default_std)
66
+ std_reward = float(default_reward)
67
+ return mean_reward, std_reward
68
+