zhiminy commited on
Commit
4c3e664
1 Parent(s): d9ab9eb

add benchmark_columns

Browse files
Files changed (2) hide show
  1. app.py +14 -2
  2. src/display/utils.py +10 -10
app.py CHANGED
@@ -96,6 +96,7 @@ def update_table(
96
  ):
97
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
98
  filtered_df = filter_queries(query, filtered_df)
 
99
  df = select_columns(filtered_df, columns)
100
  return df
101
 
@@ -151,6 +152,16 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
151
  # filtered_df = filtered_df.loc[mask]
152
 
153
  return filtered_df
 
 
 
 
 
 
 
 
 
 
154
 
155
  shown_columns = None
156
  dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
@@ -270,18 +281,19 @@ with demo:
270
  # )
271
 
272
  # breakpoint()
273
-
274
  leaderboard_table = gr.components.Dataframe(
275
  value=(
276
  leaderboard_df[
277
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
278
  + shown_columns.value
 
279
  + [AutoEvalColumn.dummy.name]
280
  ]
281
  if leaderboard_df.empty is False
282
  else leaderboard_df
283
  ),
284
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
285
  datatype=TYPES,
286
  elem_id="leaderboard-table",
287
  interactive=False,
 
96
  ):
97
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
98
  filtered_df = filter_queries(query, filtered_df)
99
+ columns.extend(add_benchmark_columns(columns))
100
  df = select_columns(filtered_df, columns)
101
  return df
102
 
 
152
  # filtered_df = filtered_df.loc[mask]
153
 
154
  return filtered_df
155
+
156
+
157
+ def add_benchmark_columns(shown_columns):
158
+ benchmark_columns = []
159
+ for benchmark in BENCHMARK_COLS:
160
+ if benchmark in shown_columns:
161
+ for c in COLS:
162
+ if benchmark in c and benchmark != c:
163
+ benchmark_columns.append(c)
164
+ return benchmark_columns
165
 
166
  shown_columns = None
167
  dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
 
281
  # )
282
 
283
  # breakpoint()
284
+ benchmark_columns = add_benchmark_columns(shown_columns.value)
285
  leaderboard_table = gr.components.Dataframe(
286
  value=(
287
  leaderboard_df[
288
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
289
  + shown_columns.value
290
+ + benchmark_columns
291
  + [AutoEvalColumn.dummy.name]
292
  ]
293
  if leaderboard_df.empty is False
294
  else leaderboard_df
295
  ),
296
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + benchmark_columns,
297
  datatype=TYPES,
298
  elem_id="leaderboard-table",
299
  interactive=False,
src/display/utils.py CHANGED
@@ -104,16 +104,16 @@ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnConten
104
  for task in Tasks:
105
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
106
  # System performance metrics
107
- auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
108
- auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
109
- # auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
110
- auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
111
- auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
112
- auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
113
  if task.value.benchmark in MULTIPLE_CHOICEs:
114
  continue
115
- # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
116
- auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
117
 
118
 
119
  # Model information
@@ -242,8 +242,8 @@ class Precision(Enum):
242
 
243
 
244
  # Column selection
245
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
246
- TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
247
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
248
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
249
 
 
104
  for task in Tasks:
105
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
106
  # System performance metrics
107
+ auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True, hidden=True)])
108
+ auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True, hidden=True)])
109
+ # auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True, hidden=True)])
110
+ auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True, hidden=True)])
111
+ auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True, hidden=True)])
112
+ auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True, hidden=True)])
113
  if task.value.benchmark in MULTIPLE_CHOICEs:
114
  continue
115
+ # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
116
+ auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
117
 
118
 
119
  # Model information
 
242
 
243
 
244
  # Column selection
245
+ COLS = [c.name for c in fields(AutoEvalColumn)]
246
+ TYPES = [c.type for c in fields(AutoEvalColumn)]
247
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
248
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
249