display button unification for benchmarks

#28
by zhiminy - opened
Files changed (2) hide show
  1. app.py +17 -4
  2. src/display/utils.py +10 -10
app.py CHANGED
@@ -89,6 +89,17 @@ def init_space():
89
  EVAL_REQUESTS_PATH, EVAL_COLS
90
  )
91
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # Searching and filtering
94
  def update_table(
@@ -96,7 +107,8 @@ def update_table(
96
  ):
97
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
98
  filtered_df = filter_queries(query, filtered_df)
99
- df = select_columns(filtered_df, columns)
 
100
  return df
101
 
102
 
@@ -270,18 +282,19 @@ with demo:
270
  # )
271
 
272
  # breakpoint()
273
-
274
  leaderboard_table = gr.components.Dataframe(
275
  value=(
276
  leaderboard_df[
277
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
278
  + shown_columns.value
 
279
  + [AutoEvalColumn.dummy.name]
280
  ]
281
  if leaderboard_df.empty is False
282
  else leaderboard_df
283
  ),
284
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
285
  datatype=TYPES,
286
  elem_id="leaderboard-table",
287
  interactive=False,
@@ -313,7 +326,7 @@ with demo:
313
  demo.load(load_query, inputs=[], outputs=[search_bar])
314
 
315
  for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
316
- selector.select(
317
  update_table,
318
  [
319
  hidden_leaderboard_table_for_search,
 
89
  EVAL_REQUESTS_PATH, EVAL_COLS
90
  )
91
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
92
+
93
+
94
+ def add_benchmark_columns(shown_columns):
95
+ benchmark_columns = []
96
+ for benchmark in BENCHMARK_COLS:
97
+ if benchmark in shown_columns:
98
+ for c in COLS:
99
+ if benchmark in c and benchmark != c:
100
+ benchmark_columns.append(c)
101
+ return benchmark_columns
102
+
103
 
104
  # Searching and filtering
105
  def update_table(
 
107
  ):
108
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
109
  filtered_df = filter_queries(query, filtered_df)
110
+ benchmark_columns = add_benchmark_columns(columns)
111
+ df = select_columns(filtered_df, columns + benchmark_columns)
112
  return df
113
 
114
 
 
282
  # )
283
 
284
  # breakpoint()
285
+ benchmark_columns = add_benchmark_columns(shown_columns.value)
286
  leaderboard_table = gr.components.Dataframe(
287
  value=(
288
  leaderboard_df[
289
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
290
  + shown_columns.value
291
+ + benchmark_columns
292
  + [AutoEvalColumn.dummy.name]
293
  ]
294
  if leaderboard_df.empty is False
295
  else leaderboard_df
296
  ),
297
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + benchmark_columns,
298
  datatype=TYPES,
299
  elem_id="leaderboard-table",
300
  interactive=False,
 
326
  demo.load(load_query, inputs=[], outputs=[search_bar])
327
 
328
  for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
329
+ selector.change(
330
  update_table,
331
  [
332
  hidden_leaderboard_table_for_search,
src/display/utils.py CHANGED
@@ -104,16 +104,16 @@ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnConten
104
  for task in Tasks:
105
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
106
  # System performance metrics
107
- auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
108
- auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
109
- # auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
110
- auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
111
- auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
112
- auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
113
  if task.value.benchmark in MULTIPLE_CHOICEs:
114
  continue
115
- # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
116
- auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
117
 
118
 
119
  # Model information
@@ -242,8 +242,8 @@ class Precision(Enum):
242
 
243
 
244
  # Column selection
245
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
246
- TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
247
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
248
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
249
 
 
104
  for task in Tasks:
105
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
106
  # System performance metrics
107
+ auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True, hidden=True)])
108
+ auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True, hidden=True)])
109
+ # auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True, hidden=True)])
110
+ auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True, hidden=True)])
111
+ auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True, hidden=True)])
112
+ auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True, hidden=True)])
113
  if task.value.benchmark in MULTIPLE_CHOICEs:
114
  continue
115
+ # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
116
+ auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
117
 
118
 
119
  # Model information
 
242
 
243
 
244
  # Column selection
245
+ COLS = [c.name for c in fields(AutoEvalColumn)]
246
+ TYPES = [c.type for c in fields(AutoEvalColumn)]
247
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
248
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
249