chriscanal commited on
Commit
8e47868
·
1 Parent(s): 75297e7

Updated app.py to fix conflict and changed name of tab per Clémentine Fourrier's request

Browse files
Files changed (1) hide show
  1. app.py +113 -65
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import json
2
  import os
 
3
  from datetime import datetime, timezone
4
 
5
  import gradio as gr
6
  import pandas as pd
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
- from huggingface_hub import HfApi
9
 
10
  from src.assets.css_html_js import custom_css, get_window_url_params
11
  from src.assets.text_content import (
@@ -24,6 +25,7 @@ from src.display_models.plot_results import (
24
  HUMAN_BASELINES,
25
  )
26
  from src.display_models.get_model_metadata import DO_NOT_SUBMIT_MODELS, ModelType
 
27
  from src.display_models.utils import (
28
  AutoEvalColumn,
29
  EvalQueueColumn,
@@ -32,7 +34,8 @@ from src.display_models.utils import (
32
  styled_message,
33
  styled_warning,
34
  )
35
- from src.load_from_hub import get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub, load_all_info_from_hub
 
36
  from src.rate_limiting import user_submission_permission
37
 
38
  pd.set_option("display.precision", 1)
@@ -60,6 +63,7 @@ api = HfApi(token=H4_TOKEN)
60
  def restart_space():
61
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
62
 
 
63
  # Rate limit variables
64
  RATE_LIMIT_PERIOD = 7
65
  RATE_LIMIT_QUOTA = 5
@@ -87,39 +91,23 @@ BENCHMARK_COLS = [
87
  ]
88
  ]
89
 
90
- ## LOAD INFO FROM HUB
91
- eval_queue, requested_models, eval_results, users_to_submission_dates = load_all_info_from_hub(
92
- QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
93
- )
94
 
95
- if not IS_PUBLIC:
96
- (eval_queue_private, requested_models_private, eval_results_private, _) = load_all_info_from_hub(
97
- PRIVATE_QUEUE_REPO,
98
- PRIVATE_RESULTS_REPO,
99
- EVAL_REQUESTS_PATH_PRIVATE,
100
- EVAL_RESULTS_PATH_PRIVATE,
101
- )
102
- else:
103
- eval_queue_private, eval_results_private = None, None
104
 
105
- original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
106
- models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
107
  plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
108
  to_be_dumped = f"models = {repr(models)}\n"
109
 
110
- # with open("models_backlinks.py", "w") as f:
111
- # f.write(to_be_dumped)
112
-
113
- # print(to_be_dumped)
114
-
115
- leaderboard_df = original_df.copy()
116
  (
117
  finished_eval_queue_df,
118
  running_eval_queue_df,
119
  pending_eval_queue_df,
120
- ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
121
-
122
- print(leaderboard_df["Precision"].unique())
123
 
124
 
125
  ## INTERACTION FUNCTIONS
@@ -135,18 +123,25 @@ def add_new_eval(
135
  precision = precision.split(" ")[0]
136
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
137
 
 
 
 
 
138
  num_models_submitted_in_period = user_submission_permission(model, users_to_submission_dates, RATE_LIMIT_PERIOD)
139
  if num_models_submitted_in_period > RATE_LIMIT_QUOTA:
140
  error_msg = f"Organisation or user `{model.split('/')[0]}`"
141
  error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
142
  error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
143
- error_msg += "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
 
 
144
  return styled_error(error_msg)
145
 
146
- if model_type is None or model_type == "":
147
- return styled_error("Please select a model type.")
 
148
 
149
- # check the model actually exists before adding the eval
150
  if revision == "":
151
  revision = "main"
152
 
@@ -160,7 +155,34 @@ def add_new_eval(
160
  if not model_on_hub:
161
  return styled_error(f'Model "{model}" {error}')
162
 
163
- print("adding new eval")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  eval_entry = {
166
  "model": model,
@@ -172,6 +194,9 @@ def add_new_eval(
172
  "status": "PENDING",
173
  "submitted_time": current_time,
174
  "model_type": model_type,
 
 
 
175
  }
176
 
177
  user_name = ""
@@ -180,14 +205,11 @@ def add_new_eval(
180
  user_name = model.split("/")[0]
181
  model_path = model.split("/")[1]
182
 
 
183
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
184
  os.makedirs(OUT_DIR, exist_ok=True)
185
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
186
 
187
- # Check if the model has been forbidden:
188
- if out_path.split("eval-queue/")[1] in DO_NOT_SUBMIT_MODELS:
189
- return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.")
190
-
191
  # Check for duplicate submission
192
  if f"{model}_{revision}_{precision}" in requested_models:
193
  return styled_warning("This model has been already submitted.")
@@ -195,6 +217,7 @@ def add_new_eval(
195
  with open(out_path, "w") as f:
196
  f.write(json.dumps(eval_entry))
197
 
 
198
  api.upload_file(
199
  path_or_fileobj=out_path,
200
  path_in_repo=out_path.split("eval-queue/")[1],
@@ -203,7 +226,7 @@ def add_new_eval(
203
  commit_message=f"Add {model} to eval queue",
204
  )
205
 
206
- # remove the local file
207
  os.remove(out_path)
208
 
209
  return styled_message(
@@ -223,17 +246,25 @@ def change_tab(query_param: str):
223
 
224
 
225
  # Searching and filtering
226
- def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
 
 
 
 
 
 
 
 
227
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
228
- if query != "":
229
- filtered_df = search_table(filtered_df, query)
230
  df = select_columns(filtered_df, columns)
231
-
232
  return df
233
 
 
234
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
235
  return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
236
 
 
237
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
238
  always_here_cols = [
239
  AutoEvalColumn.model_type_symbol.name,
@@ -245,16 +276,39 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
245
  ]
246
  return filtered_df
247
 
 
248
  NUMERIC_INTERVALS = {
249
- "Unknown": pd.Interval(-1, 0, closed="right"),
250
- "< 1.5B": pd.Interval(0, 1.5, closed="right"),
251
- "~3B": pd.Interval(1.5, 5, closed="right"),
252
- "~7B": pd.Interval(6, 11, closed="right"),
253
- "~13B": pd.Interval(12, 15, closed="right"),
254
- "~35B": pd.Interval(16, 55, closed="right"),
255
- "60B+": pd.Interval(55, 10000, closed="right"),
 
256
  }
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  def filter_models(
259
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
260
  ) -> pd.DataFrame:
@@ -266,7 +320,7 @@ def filter_models(
266
 
267
  type_emoji = [t[0] for t in type_query]
268
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
269
- filtered_df = filtered_df[df[AutoEvalColumn.precision.name].isin(precision_query)]
270
 
271
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
272
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
@@ -287,7 +341,7 @@ with demo:
287
  with gr.Column():
288
  with gr.Row():
289
  search_bar = gr.Textbox(
290
- placeholder=" 🔍 Search for your model and press ENTER...",
291
  show_label=False,
292
  elem_id="search-bar",
293
  )
@@ -332,12 +386,14 @@ with demo:
332
  ModelType.FT.to_str(),
333
  ModelType.IFT.to_str(),
334
  ModelType.RL.to_str(),
 
335
  ],
336
  value=[
337
  ModelType.PT.to_str(),
338
  ModelType.FT.to_str(),
339
  ModelType.IFT.to_str(),
340
  ModelType.RL.to_str(),
 
341
  ],
342
  interactive=True,
343
  elem_id="filter-columns-type",
@@ -350,12 +406,13 @@ with demo:
350
  elem_id="filter-columns-precision",
351
  )
352
  filter_columns_size = gr.CheckboxGroup(
353
- label="Model sizes",
354
  choices=list(NUMERIC_INTERVALS.keys()),
355
  value=list(NUMERIC_INTERVALS.keys()),
356
  interactive=True,
357
  elem_id="filter-columns-size",
358
  )
 
359
  leaderboard_table = gr.components.Dataframe(
360
  value=leaderboard_df[
361
  [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name]
@@ -387,7 +444,6 @@ with demo:
387
  update_table,
388
  [
389
  hidden_leaderboard_table_for_search,
390
- leaderboard_table,
391
  shown_columns,
392
  filter_columns_type,
393
  filter_columns_precision,
@@ -401,7 +457,6 @@ with demo:
401
  update_table,
402
  [
403
  hidden_leaderboard_table_for_search,
404
- leaderboard_table,
405
  shown_columns,
406
  filter_columns_type,
407
  filter_columns_precision,
@@ -416,7 +471,6 @@ with demo:
416
  update_table,
417
  [
418
  hidden_leaderboard_table_for_search,
419
- leaderboard_table,
420
  shown_columns,
421
  filter_columns_type,
422
  filter_columns_precision,
@@ -431,7 +485,6 @@ with demo:
431
  update_table,
432
  [
433
  hidden_leaderboard_table_for_search,
434
- leaderboard_table,
435
  shown_columns,
436
  filter_columns_type,
437
  filter_columns_precision,
@@ -446,7 +499,6 @@ with demo:
446
  update_table,
447
  [
448
  hidden_leaderboard_table_for_search,
449
- leaderboard_table,
450
  shown_columns,
451
  filter_columns_type,
452
  filter_columns_precision,
@@ -461,7 +513,6 @@ with demo:
461
  update_table,
462
  [
463
  hidden_leaderboard_table_for_search,
464
- leaderboard_table,
465
  shown_columns,
466
  filter_columns_type,
467
  filter_columns_precision,
@@ -472,7 +523,8 @@ with demo:
472
  leaderboard_table,
473
  queue=True,
474
  )
475
- with gr.TabItem("📈 Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
 
476
  with gr.Row():
477
  with gr.Column():
478
  chart = create_metric_plot_obj(
@@ -556,13 +608,7 @@ with demo:
556
 
557
  with gr.Column():
558
  precision = gr.Dropdown(
559
- choices=[
560
- "float16",
561
- "bfloat16",
562
- "8bit (LLM.int8)",
563
- "4bit (QLoRA / FP4)",
564
- "GPTQ"
565
- ],
566
  label="Precision",
567
  multiselect=False,
568
  value="float16",
@@ -598,8 +644,10 @@ with demo:
598
  citation_button = gr.Textbox(
599
  value=CITATION_BUTTON_TEXT,
600
  label=CITATION_BUTTON_LABEL,
 
601
  elem_id="citation-button",
602
- ).style(show_copy_button=True)
 
603
 
604
  dummy = gr.Textbox(visible=False)
605
  demo.load(
 
1
  import json
2
  import os
3
+ import re
4
  from datetime import datetime, timezone
5
 
6
  import gradio as gr
7
  import pandas as pd
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
+ from huggingface_hub import HfApi, snapshot_download
10
 
11
  from src.assets.css_html_js import custom_css, get_window_url_params
12
  from src.assets.text_content import (
 
25
  HUMAN_BASELINES,
26
  )
27
  from src.display_models.get_model_metadata import DO_NOT_SUBMIT_MODELS, ModelType
28
+ from src.display_models.modelcard_filter import check_model_card
29
  from src.display_models.utils import (
30
  AutoEvalColumn,
31
  EvalQueueColumn,
 
34
  styled_message,
35
  styled_warning,
36
  )
37
+ from src.manage_collections import update_collections
38
+ from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
39
  from src.rate_limiting import user_submission_permission
40
 
41
  pd.set_option("display.precision", 1)
 
63
  def restart_space():
64
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
65
 
66
+
67
  # Rate limit variables
68
  RATE_LIMIT_PERIOD = 7
69
  RATE_LIMIT_QUOTA = 5
 
91
  ]
92
  ]
93
 
94
+ snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None)
95
+ snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None)
96
+ requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
 
97
 
98
+ original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
99
+ update_collections(original_df.copy())
100
+ leaderboard_df = original_df.copy()
 
 
 
 
 
 
101
 
102
+ models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
 
103
  plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
104
  to_be_dumped = f"models = {repr(models)}\n"
105
 
 
 
 
 
 
 
106
  (
107
  finished_eval_queue_df,
108
  running_eval_queue_df,
109
  pending_eval_queue_df,
110
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
 
111
 
112
 
113
  ## INTERACTION FUNCTIONS
 
123
  precision = precision.split(" ")[0]
124
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
125
 
126
+ if model_type is None or model_type == "":
127
+ return styled_error("Please select a model type.")
128
+
129
+ # Is the user rate limited?
130
  num_models_submitted_in_period = user_submission_permission(model, users_to_submission_dates, RATE_LIMIT_PERIOD)
131
  if num_models_submitted_in_period > RATE_LIMIT_QUOTA:
132
  error_msg = f"Organisation or user `{model.split('/')[0]}`"
133
  error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
134
  error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
135
+ error_msg += (
136
+ "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
137
+ )
138
  return styled_error(error_msg)
139
 
140
+ # Did the model authors forbid its submission to the leaderboard?
141
+ if model in DO_NOT_SUBMIT_MODELS or base_model in DO_NOT_SUBMIT_MODELS:
142
+ return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.")
143
 
144
+ # Does the model actually exist?
145
  if revision == "":
146
  revision = "main"
147
 
 
155
  if not model_on_hub:
156
  return styled_error(f'Model "{model}" {error}')
157
 
158
+ model_info = api.model_info(repo_id=model, revision=revision)
159
+
160
+ size_pattern = size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
161
+ try:
162
+ model_size = round(model_info.safetensors["total"] / 1e9, 3)
163
+ except AttributeError:
164
+ try:
165
+ size_match = re.search(size_pattern, model.lower())
166
+ model_size = size_match.group(0)
167
+ model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
168
+ except AttributeError:
169
+ return 65
170
+
171
+ size_factor = 8 if (precision == "GPTQ" or "GPTQ" in model) else 1
172
+ model_size = size_factor * model_size
173
+
174
+ try:
175
+ license = model_info.cardData["license"]
176
+ except Exception:
177
+ license = "?"
178
+
179
+ # Were the model card and license filled?
180
+ modelcard_OK, error_msg = check_model_card(model)
181
+ if not modelcard_OK:
182
+ return styled_error(error_msg)
183
+
184
+ # Seems good, creating the eval
185
+ print("Adding new eval")
186
 
187
  eval_entry = {
188
  "model": model,
 
194
  "status": "PENDING",
195
  "submitted_time": current_time,
196
  "model_type": model_type,
197
+ "likes": model_info.likes,
198
+ "params": model_size,
199
+ "license": license,
200
  }
201
 
202
  user_name = ""
 
205
  user_name = model.split("/")[0]
206
  model_path = model.split("/")[1]
207
 
208
+ print("Creating eval file")
209
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
210
  os.makedirs(OUT_DIR, exist_ok=True)
211
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
212
 
 
 
 
 
213
  # Check for duplicate submission
214
  if f"{model}_{revision}_{precision}" in requested_models:
215
  return styled_warning("This model has been already submitted.")
 
217
  with open(out_path, "w") as f:
218
  f.write(json.dumps(eval_entry))
219
 
220
+ print("Uploading eval file")
221
  api.upload_file(
222
  path_or_fileobj=out_path,
223
  path_in_repo=out_path.split("eval-queue/")[1],
 
226
  commit_message=f"Add {model} to eval queue",
227
  )
228
 
229
+ # Remove the local file
230
  os.remove(out_path)
231
 
232
  return styled_message(
 
246
 
247
 
248
  # Searching and filtering
249
+ def update_table(
250
+ hidden_df: pd.DataFrame,
251
+ columns: list,
252
+ type_query: list,
253
+ precision_query: str,
254
+ size_query: list,
255
+ show_deleted: bool,
256
+ query: str,
257
+ ):
258
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
259
+ filtered_df = filter_queries(query, filtered_df)
 
260
  df = select_columns(filtered_df, columns)
 
261
  return df
262
 
263
+
264
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
265
  return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
266
 
267
+
268
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
269
  always_here_cols = [
270
  AutoEvalColumn.model_type_symbol.name,
 
276
  ]
277
  return filtered_df
278
 
279
+
280
  NUMERIC_INTERVALS = {
281
+ "?": pd.Interval(-1, 0, closed="right"),
282
+ "0~1.5": pd.Interval(0, 1.5, closed="right"),
283
+ "1.5~3": pd.Interval(1.5, 3, closed="right"),
284
+ "3~7": pd.Interval(3, 7, closed="right"),
285
+ "7~13": pd.Interval(7, 13, closed="right"),
286
+ "13~35": pd.Interval(13, 35, closed="right"),
287
+ "35~60": pd.Interval(35, 60, closed="right"),
288
+ "60+": pd.Interval(60, 10000, closed="right"),
289
  }
290
 
291
+
292
+ def filter_queries(query: str, filtered_df: pd.DataFrame):
293
+ """Added by Abishek"""
294
+ final_df = []
295
+ if query != "":
296
+ queries = [q.strip() for q in query.split(";")]
297
+ for _q in queries:
298
+ _q = _q.strip()
299
+ if _q != "":
300
+ temp_filtered_df = search_table(filtered_df, _q)
301
+ if len(temp_filtered_df) > 0:
302
+ final_df.append(temp_filtered_df)
303
+ if len(final_df) > 0:
304
+ filtered_df = pd.concat(final_df)
305
+ filtered_df = filtered_df.drop_duplicates(
306
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
307
+ )
308
+
309
+ return filtered_df
310
+
311
+
312
  def filter_models(
313
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
314
  ) -> pd.DataFrame:
 
320
 
321
  type_emoji = [t[0] for t in type_query]
322
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
323
+ filtered_df = filtered_df[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
324
 
325
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
326
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
 
341
  with gr.Column():
342
  with gr.Row():
343
  search_bar = gr.Textbox(
344
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
345
  show_label=False,
346
  elem_id="search-bar",
347
  )
 
386
  ModelType.FT.to_str(),
387
  ModelType.IFT.to_str(),
388
  ModelType.RL.to_str(),
389
+ ModelType.Unknown.to_str(),
390
  ],
391
  value=[
392
  ModelType.PT.to_str(),
393
  ModelType.FT.to_str(),
394
  ModelType.IFT.to_str(),
395
  ModelType.RL.to_str(),
396
+ ModelType.Unknown.to_str(),
397
  ],
398
  interactive=True,
399
  elem_id="filter-columns-type",
 
406
  elem_id="filter-columns-precision",
407
  )
408
  filter_columns_size = gr.CheckboxGroup(
409
+ label="Model sizes (in billions of parameters)",
410
  choices=list(NUMERIC_INTERVALS.keys()),
411
  value=list(NUMERIC_INTERVALS.keys()),
412
  interactive=True,
413
  elem_id="filter-columns-size",
414
  )
415
+
416
  leaderboard_table = gr.components.Dataframe(
417
  value=leaderboard_df[
418
  [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name]
 
444
  update_table,
445
  [
446
  hidden_leaderboard_table_for_search,
 
447
  shown_columns,
448
  filter_columns_type,
449
  filter_columns_precision,
 
457
  update_table,
458
  [
459
  hidden_leaderboard_table_for_search,
 
460
  shown_columns,
461
  filter_columns_type,
462
  filter_columns_precision,
 
471
  update_table,
472
  [
473
  hidden_leaderboard_table_for_search,
 
474
  shown_columns,
475
  filter_columns_type,
476
  filter_columns_precision,
 
485
  update_table,
486
  [
487
  hidden_leaderboard_table_for_search,
 
488
  shown_columns,
489
  filter_columns_type,
490
  filter_columns_precision,
 
499
  update_table,
500
  [
501
  hidden_leaderboard_table_for_search,
 
502
  shown_columns,
503
  filter_columns_type,
504
  filter_columns_precision,
 
513
  update_table,
514
  [
515
  hidden_leaderboard_table_for_search,
 
516
  shown_columns,
517
  filter_columns_type,
518
  filter_columns_precision,
 
523
  leaderboard_table,
524
  queue=True,
525
  )
526
+
527
+ with gr.TabItem("📈 Metrics evolution through time", elem_id="llm-benchmark-tab-table", id=4):
528
  with gr.Row():
529
  with gr.Column():
530
  chart = create_metric_plot_obj(
 
608
 
609
  with gr.Column():
610
  precision = gr.Dropdown(
611
+ choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ"],
 
 
 
 
 
 
612
  label="Precision",
613
  multiselect=False,
614
  value="float16",
 
644
  citation_button = gr.Textbox(
645
  value=CITATION_BUTTON_TEXT,
646
  label=CITATION_BUTTON_LABEL,
647
+ lines=20,
648
  elem_id="citation-button",
649
+ show_copy_button=True,
650
+ )
651
 
652
  dummy = gr.Textbox(visible=False)
653
  demo.load(