baptistecolle HF staff commited on
Commit
51a4daf
β€’
1 Parent(s): e47d0b2

add support for additional backends (#33)

Browse files

- add support for more backend (e37cee2506e5bdffbf68513d4e57d076c2c64c6f)
- remove uncessary warnings (0732974d483476a9eb15d1367abcb8f17069f57d)

app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
 
 
3
  from src.assets import custom_css
4
 
5
  # from src.attention import create_attn_plots
@@ -14,10 +15,13 @@ from src.panel import (
14
  create_select_callback,
15
  )
16
 
17
- configs = load_hardware_configs("hardware.yml")
18
 
19
 
20
- demo = gr.Blocks(css=custom_css)
 
 
 
21
  with demo:
22
  gr.HTML(LOGO, elem_classes="logo")
23
  gr.HTML(TITLE, elem_classes="title")
@@ -29,13 +33,13 @@ with demo:
29
  if config.detail:
30
  gr.Markdown(config.detail, elem_classes="descriptive-text")
31
 
32
-
33
-
34
  # ####################### CONTROL PANEL #######################
35
  (
36
  filter_button,
37
- machine_textbox,
38
- subsets_values,
 
 
39
  score_slider,
40
  memory_slider,
41
  backend_checkboxes,
@@ -43,18 +47,33 @@ with demo:
43
  optimization_checkboxes,
44
  quantization_checkboxes,
45
  kernels_checkboxes,
46
- ) = create_control_panel(machine=config.machine, subsets=config.subsets, hardware_provider=config.hardware_provider)
 
 
 
 
 
 
47
  ####################### HARDWARE SUBTABS #######################
48
  with gr.Tabs(elem_classes="subtabs"):
49
- open_llm_perf_df = get_llm_perf_df(machine=config.machine, subsets=config.subsets)
 
 
 
 
 
50
  ####################### LEADERBOARD TAB #######################
51
  with gr.TabItem("Leaderboard πŸ…", id=0):
52
  search_bar, columns_checkboxes, leaderboard_table = (
53
  create_leaderboard_table(open_llm_perf_df)
54
  )
55
- if config.hardware_provider != "intel": # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
 
 
56
  with gr.TabItem("Find Your Best Model 🧭", id=1):
57
- lat_score_mem_plot = create_lat_score_mem_plot(open_llm_perf_df)
 
 
58
  ###################### ATTENTIONS SPEEDUP TAB #######################
59
  # with gr.TabItem("Attention πŸ“ˆ", id=2):
60
  # attn_prefill_plot, attn_decode_plot = create_attn_plots(
@@ -70,8 +89,10 @@ with demo:
70
  create_control_callback(
71
  filter_button,
72
  # inputs
73
- machine_textbox,
74
- subsets_values,
 
 
75
  score_slider,
76
  memory_slider,
77
  backend_checkboxes,
@@ -93,8 +114,10 @@ with demo:
93
 
94
  create_select_callback(
95
  # inputs
96
- machine_textbox,
97
- subsets_values,
 
 
98
  # interactive
99
  columns_checkboxes,
100
  search_bar,
 
1
  import gradio as gr
2
 
3
+ import src.dependency # noqa
4
  from src.assets import custom_css
5
 
6
  # from src.attention import create_attn_plots
 
15
  create_select_callback,
16
  )
17
 
18
+ configs = load_hardware_configs("hardware.yaml")
19
 
20
 
21
+ demo = gr.Blocks(
22
+ css=custom_css,
23
+ theme=gr.themes.Default(primary_hue="indigo", secondary_hue="indigo"),
24
+ )
25
  with demo:
26
  gr.HTML(LOGO, elem_classes="logo")
27
  gr.HTML(TITLE, elem_classes="title")
 
33
  if config.detail:
34
  gr.Markdown(config.detail, elem_classes="descriptive-text")
35
 
 
 
36
  # ####################### CONTROL PANEL #######################
37
  (
38
  filter_button,
39
+ machine_value,
40
+ subsets_value,
41
+ backends_value,
42
+ hardware_type_value,
43
  score_slider,
44
  memory_slider,
45
  backend_checkboxes,
 
47
  optimization_checkboxes,
48
  quantization_checkboxes,
49
  kernels_checkboxes,
50
+ ) = create_control_panel(
51
+ machine=config.machine,
52
+ subsets=config.subsets,
53
+ backends=config.backends,
54
+ hardware_type=config.hardware_type,
55
+ hardware_provider=config.hardware_provider,
56
+ )
57
  ####################### HARDWARE SUBTABS #######################
58
  with gr.Tabs(elem_classes="subtabs"):
59
+ open_llm_perf_df = get_llm_perf_df(
60
+ machine=config.machine,
61
+ subsets=config.subsets,
62
+ backends=config.backends,
63
+ hardware_type=config.hardware_type,
64
+ )
65
  ####################### LEADERBOARD TAB #######################
66
  with gr.TabItem("Leaderboard πŸ…", id=0):
67
  search_bar, columns_checkboxes, leaderboard_table = (
68
  create_leaderboard_table(open_llm_perf_df)
69
  )
70
+ if (
71
+ config.hardware_provider != "intel"
72
+ ): # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
73
  with gr.TabItem("Find Your Best Model 🧭", id=1):
74
+ lat_score_mem_plot = create_lat_score_mem_plot(
75
+ open_llm_perf_df
76
+ )
77
  ###################### ATTENTIONS SPEEDUP TAB #######################
78
  # with gr.TabItem("Attention πŸ“ˆ", id=2):
79
  # attn_prefill_plot, attn_decode_plot = create_attn_plots(
 
89
  create_control_callback(
90
  filter_button,
91
  # inputs
92
+ machine_value,
93
+ subsets_value,
94
+ backends_value,
95
+ hardware_type_value,
96
  score_slider,
97
  memory_slider,
98
  backend_checkboxes,
 
114
 
115
  create_select_callback(
116
  # inputs
117
+ machine_value,
118
+ subsets_value,
119
+ backends_value,
120
+ hardware_type_value,
121
  # interactive
122
  columns_checkboxes,
123
  search_bar,
hardware.yml β†’ hardware.yaml RENAMED
@@ -1,7 +1,7 @@
1
  - machine: 1xA10
2
  description: A10-24GB-150W πŸ–₯️
3
  hardware_provider: nvidia
4
- hardware_type: gpu
5
  subsets:
6
  - unquantized
7
  - awq
@@ -13,7 +13,7 @@
13
  - machine: 1xA100
14
  description: A100-80GB-275W πŸ–₯️
15
  hardware_provider: nvidia
16
- hardware_type: gpu
17
  subsets:
18
  - unquantized
19
  - awq
@@ -25,7 +25,7 @@
25
  - machine: 1xT4
26
  description: T4-16GB-70W πŸ–₯️
27
  hardware_provider: nvidia
28
- hardware_type: gpu
29
  subsets:
30
  - unquantized
31
  - awq
@@ -43,4 +43,6 @@
43
  subsets:
44
  - unquantized
45
  backends:
46
- - pytorch
 
 
 
1
  - machine: 1xA10
2
  description: A10-24GB-150W πŸ–₯️
3
  hardware_provider: nvidia
4
+ hardware_type: cuda
5
  subsets:
6
  - unquantized
7
  - awq
 
13
  - machine: 1xA100
14
  description: A100-80GB-275W πŸ–₯️
15
  hardware_provider: nvidia
16
+ hardware_type: cuda
17
  subsets:
18
  - unquantized
19
  - awq
 
25
  - machine: 1xT4
26
  description: T4-16GB-70W πŸ–₯️
27
  hardware_provider: nvidia
28
+ hardware_type: cuda
29
  subsets:
30
  - unquantized
31
  - awq
 
43
  subsets:
44
  - unquantized
45
  backends:
46
+ - pytorch
47
+ - openvino
48
+ - onnxruntime
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  huggingface_hub
2
  transformers
3
- gradio
4
  plotly
5
- pandas
 
 
1
  huggingface_hub
2
  transformers
3
+ gradio>=5.0.0
4
  plotly
5
+ pandas
6
+ ruff
src/content.py CHANGED
@@ -5,18 +5,18 @@ TITLE = """<h1 align="center" id="space-title">πŸ€— LLM-Perf Leaderboard πŸ‹οΈ
5
  ABOUT = """
6
  ## πŸ“ About
7
  The πŸ€— LLM-Perf Leaderboard πŸ‹οΈ is a laderboard at the intersection of quality and performance.
8
- Its aim is to benchmark the performance (latency, throughput, memory & energy)
9
- of Large Language Models (LLMs) with different hardwares, backends and optimizations
10
  using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
11
 
12
- Anyone from the community can request a new base model or hardware/backend/optimization
13
  configuration for automated benchmarking:
14
 
15
- - Model evaluation requests should be made in the
16
  [πŸ€— Open LLM Leaderboard πŸ…](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
17
  we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
18
- - Hardware/Backend/Optimization configuration requests should be made in the
19
- [πŸ€— LLM-Perf Leaderboard πŸ‹οΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
20
  [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
21
 
22
  ## ✍️ Details
 
5
  ABOUT = """
6
  ## πŸ“ About
7
  The πŸ€— LLM-Perf Leaderboard πŸ‹οΈ is a laderboard at the intersection of quality and performance.
8
+ Its aim is to benchmark the performance (latency, throughput, memory & energy)
9
+ of Large Language Models (LLMs) with different hardwares, backends and optimizations
10
  using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
11
 
12
+ Anyone from the community can request a new base model or hardware/backend/optimization
13
  configuration for automated benchmarking:
14
 
15
+ - Model evaluation requests should be made in the
16
  [πŸ€— Open LLM Leaderboard πŸ…](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
17
  we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
18
+ - Hardware/Backend/Optimization configuration requests should be made in the
19
+ [πŸ€— LLM-Perf Leaderboard πŸ‹οΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
20
  [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
21
 
22
  ## ✍️ Details
src/dependency.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import os
2
+
3
+ os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
src/hardware.py CHANGED
@@ -1,19 +1,19 @@
1
- from typing import Any, Dict, List
2
 
3
  import yaml
4
 
5
 
6
  class HardwareConfig:
7
  def __init__(self, data: Dict[str, Any]):
8
- self.machine = data["machine"]
9
- self.description = data["description"]
10
- self.hardware_provider = data["hardware_provider"]
11
- self.hardware_type = data["hardware_type"]
12
- self.subsets = data["subsets"]
13
- self.backends = data["backends"]
14
- self.detail = data.get("detail", None)
15
 
16
- def __repr__(self):
17
  return (
18
  f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
19
  f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
 
1
+ from typing import Any, Dict, List, Optional
2
 
3
  import yaml
4
 
5
 
6
  class HardwareConfig:
7
  def __init__(self, data: Dict[str, Any]):
8
+ self.machine: str = data["machine"]
9
+ self.description: str = data["description"]
10
+ self.hardware_provider: str = data["hardware_provider"]
11
+ self.hardware_type: str = data["hardware_type"]
12
+ self.subsets: List[str] = data["subsets"]
13
+ self.backends: List[str] = data["backends"]
14
+ self.detail: Optional[str] = data.get("detail", None)
15
 
16
+ def __repr__(self) -> str:
17
  return (
18
  f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
19
  f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
src/llm_perf.py CHANGED
@@ -3,8 +3,6 @@ from typing import List
3
 
4
  import pandas as pd
5
 
6
- from src.hardware import HardwareConfig
7
-
8
  from .utils import process_kernels, process_quantizations
9
 
10
  DATASET_DIRECTORY = "dataset"
@@ -34,17 +32,31 @@ SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"]
34
  SORTING_ASCENDING = [False, True, False]
35
 
36
 
37
- def get_raw_llm_perf_df(machine: str, subsets: List[str]):
 
 
38
  dfs = []
39
  for subset in subsets:
40
- try:
41
- dfs.append(
42
- pd.read_csv(
43
- f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{subset}-{machine}.csv"
 
 
44
  )
45
- )
46
- except Exception:
47
- print(f"Subset {subset} for machine {machine} not found")
 
 
 
 
 
 
 
 
 
 
48
 
49
  perf_df = pd.concat(dfs)
50
  llm_df = pd.read_csv(
@@ -112,15 +124,22 @@ def processed_llm_perf_df(llm_perf_df):
112
  return llm_perf_df
113
 
114
 
115
- def get_llm_perf_df(machine: str, subsets: List[str]):
 
 
116
  if not os.path.exists(DATASET_DIRECTORY):
117
  os.makedirs(DATASET_DIRECTORY)
118
 
119
  if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
120
- llm_perf_df = pd.read_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv")
 
 
121
  else:
122
- llm_perf_df = get_raw_llm_perf_df(machine, subsets)
 
123
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
124
- llm_perf_df.to_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False)
 
 
125
 
126
  return llm_perf_df
 
3
 
4
  import pandas as pd
5
 
 
 
6
  from .utils import process_kernels, process_quantizations
7
 
8
  DATASET_DIRECTORY = "dataset"
 
32
  SORTING_ASCENDING = [False, True, False]
33
 
34
 
35
+ def get_raw_llm_perf_df(
36
+ machine: str, subsets: List[str], backends: List[str], hardware_type: str
37
+ ):
38
  dfs = []
39
  for subset in subsets:
40
+ for backend in backends:
41
+ try:
42
+ dfs.append(
43
+ pd.read_csv(
44
+ f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
45
+ )
46
  )
47
+ except Exception:
48
+ print("Dataset not found for:")
49
+ print(f" β€’ Backend: {backend}")
50
+ print(f" β€’ Subset: {subset}")
51
+ print(f" β€’ Machine: {machine}")
52
+ print(f" β€’ Hardware Type: {hardware_type}")
53
+ url = f"https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard/blob/main/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
54
+ print(f" β€’ URL: {url}")
55
+
56
+ if len(dfs) == 0:
57
+ raise ValueError(
58
+ f"No datasets found for machine {machine}, check your hardware.yml config file or your datatset on huggingface"
59
+ )
60
 
61
  perf_df = pd.concat(dfs)
62
  llm_df = pd.read_csv(
 
124
  return llm_perf_df
125
 
126
 
127
+ def get_llm_perf_df(
128
+ machine: str, subsets: List[str], backends: List[str], hardware_type: str
129
+ ):
130
  if not os.path.exists(DATASET_DIRECTORY):
131
  os.makedirs(DATASET_DIRECTORY)
132
 
133
  if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
134
+ llm_perf_df = pd.read_csv(
135
+ f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"
136
+ )
137
  else:
138
+ print(f"Dataset machine {machine} not found, downloading...")
139
+ llm_perf_df = get_raw_llm_perf_df(machine, subsets, backends, hardware_type)
140
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
141
+ llm_perf_df.to_csv(
142
+ f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False
143
+ )
144
 
145
  return llm_perf_df
src/panel.py CHANGED
@@ -10,17 +10,30 @@ from src.llm_perf import get_llm_perf_df
10
  from src.map import get_lat_score_mem_fig
11
 
12
 
13
- def create_control_panel(machine: str, subsets: List[str], hardware_provider: str):
 
 
 
 
 
 
14
  # controls
15
- machine_textbox = gr.Textbox(value=machine, visible=False)
16
- subsets_values = gr.State(value=subsets)
17
-
 
18
 
19
  if hardware_provider == "nvidia":
20
  backends = ["pytorch"]
21
  attention_implementations = ["Eager", "SDPA", "FAv2"]
22
  quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
23
- kernels = ["No Kernel", "GPTQ.ExllamaV1", "GPTQ.ExllamaV2", "AWQ.GEMM", "AWQ.GEMV"]
 
 
 
 
 
 
24
  elif hardware_provider == "intel":
25
  backends = ["pytorch", "onnxruntime", "openvino"]
26
  attention_implementations = ["Eager"]
@@ -29,7 +42,6 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
29
  else:
30
  raise ValueError(f"Unknown hardware provider: {hardware_provider}")
31
 
32
-
33
  with gr.Accordion("Control Panel πŸŽ›οΈ", open=False, elem_id="control-panel"):
34
  with gr.Row():
35
  with gr.Column(scale=2, variant="panel"):
@@ -101,8 +113,10 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
101
 
102
  return (
103
  filter_button,
104
- machine_textbox,
105
- subsets_values,
 
 
106
  score_slider,
107
  memory_slider,
108
  backend_checkboxes,
@@ -116,10 +130,12 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
116
  def filter_rows_fn(
117
  machine,
118
  subsets,
 
 
119
  # inputs
120
  score,
121
  memory,
122
- backends,
123
  precisions,
124
  attentions,
125
  quantizations,
@@ -128,12 +144,14 @@ def filter_rows_fn(
128
  columns,
129
  search,
130
  ):
131
- llm_perf_df = get_llm_perf_df(machine=machine, subsets=subsets)
 
 
132
  # print(attentions)
133
  # print(llm_perf_df["Attention πŸ‘οΈ"].unique())
134
  filtered_llm_perf_df = llm_perf_df[
135
  llm_perf_df["Model πŸ€—"].str.contains(search, case=False)
136
- & llm_perf_df["Backend 🏭"].isin(backends)
137
  & llm_perf_df["Precision πŸ“₯"].isin(precisions)
138
  & llm_perf_df["Attention πŸ‘οΈ"].isin(attentions)
139
  & llm_perf_df["Quantization πŸ—œοΈ"].isin(quantizations)
@@ -142,7 +160,7 @@ def filter_rows_fn(
142
  & (llm_perf_df["Memory (MB)"] <= memory)
143
  ]
144
  selected_filtered_llm_perf_df = select_columns_fn(
145
- machine, subsets, columns, search, filtered_llm_perf_df
146
  )
147
  selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
148
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
@@ -168,8 +186,10 @@ def create_control_callback(
168
  # button
169
  filter_button,
170
  # fixed
171
- machine_textbox,
172
- subsets_textbox,
 
 
173
  # inputs
174
  score_slider,
175
  memory_slider,
@@ -195,8 +215,10 @@ def create_control_callback(
195
  fn=filter_rows_fn,
196
  inputs=[
197
  # fixed
198
- machine_textbox,
199
- subsets_textbox,
 
 
200
  # inputs
201
  score_slider,
202
  memory_slider,
@@ -222,9 +244,16 @@ def create_control_callback(
222
  )
223
 
224
 
225
- def select_columns_fn(machine, subsets, columns, search, llm_perf_df=None):
 
 
226
  if llm_perf_df is None:
227
- llm_perf_df = get_llm_perf_df(machine=machine, subsets=subsets)
 
 
 
 
 
228
 
229
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
230
  selected_leaderboard_df = selected_leaderboard_df[
@@ -237,8 +266,10 @@ def select_columns_fn(machine, subsets, columns, search, llm_perf_df=None):
237
 
238
  def create_select_callback(
239
  # fixed
240
- machine_textbox,
241
- subsets_values,
 
 
242
  # interactive
243
  columns_checkboxes,
244
  search_bar,
@@ -247,11 +278,25 @@ def create_select_callback(
247
  ):
248
  columns_checkboxes.change(
249
  fn=select_columns_fn,
250
- inputs=[machine_textbox, subsets_values, columns_checkboxes, search_bar],
 
 
 
 
 
 
 
251
  outputs=[leaderboard_table],
252
  )
253
  search_bar.change(
254
  fn=select_columns_fn,
255
- inputs=[machine_textbox, subsets_values, columns_checkboxes, search_bar],
 
 
 
 
 
 
 
256
  outputs=[leaderboard_table],
257
  )
 
10
  from src.map import get_lat_score_mem_fig
11
 
12
 
13
+ def create_control_panel(
14
+ machine: str,
15
+ subsets: List[str],
16
+ backends: List[str],
17
+ hardware_provider: str,
18
+ hardware_type: str,
19
+ ):
20
  # controls
21
+ machine_value = gr.State(value=machine)
22
+ subsets_value = gr.State(value=subsets)
23
+ backends_value = gr.State(value=backends)
24
+ hardware_type_value = gr.State(value=hardware_type)
25
 
26
  if hardware_provider == "nvidia":
27
  backends = ["pytorch"]
28
  attention_implementations = ["Eager", "SDPA", "FAv2"]
29
  quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
30
+ kernels = [
31
+ "No Kernel",
32
+ "GPTQ.ExllamaV1",
33
+ "GPTQ.ExllamaV2",
34
+ "AWQ.GEMM",
35
+ "AWQ.GEMV",
36
+ ]
37
  elif hardware_provider == "intel":
38
  backends = ["pytorch", "onnxruntime", "openvino"]
39
  attention_implementations = ["Eager"]
 
42
  else:
43
  raise ValueError(f"Unknown hardware provider: {hardware_provider}")
44
 
 
45
  with gr.Accordion("Control Panel πŸŽ›οΈ", open=False, elem_id="control-panel"):
46
  with gr.Row():
47
  with gr.Column(scale=2, variant="panel"):
 
113
 
114
  return (
115
  filter_button,
116
+ machine_value,
117
+ backends_value,
118
+ hardware_type_value,
119
+ subsets_value,
120
  score_slider,
121
  memory_slider,
122
  backend_checkboxes,
 
130
  def filter_rows_fn(
131
  machine,
132
  subsets,
133
+ backends,
134
+ hardware_type,
135
  # inputs
136
  score,
137
  memory,
138
+ backend_checkboxes,
139
  precisions,
140
  attentions,
141
  quantizations,
 
144
  columns,
145
  search,
146
  ):
147
+ llm_perf_df = get_llm_perf_df(
148
+ machine=machine, subsets=subsets, backends=backends, hardware_type=hardware_type
149
+ )
150
  # print(attentions)
151
  # print(llm_perf_df["Attention πŸ‘οΈ"].unique())
152
  filtered_llm_perf_df = llm_perf_df[
153
  llm_perf_df["Model πŸ€—"].str.contains(search, case=False)
154
+ & llm_perf_df["Backend 🏭"].isin(backend_checkboxes)
155
  & llm_perf_df["Precision πŸ“₯"].isin(precisions)
156
  & llm_perf_df["Attention πŸ‘οΈ"].isin(attentions)
157
  & llm_perf_df["Quantization πŸ—œοΈ"].isin(quantizations)
 
160
  & (llm_perf_df["Memory (MB)"] <= memory)
161
  ]
162
  selected_filtered_llm_perf_df = select_columns_fn(
163
+ machine, subsets, backends, hardware_type, columns, search, filtered_llm_perf_df
164
  )
165
  selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
166
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
 
186
  # button
187
  filter_button,
188
  # fixed
189
+ machine_value,
190
+ subsets_value,
191
+ backends_value,
192
+ hardware_type_value,
193
  # inputs
194
  score_slider,
195
  memory_slider,
 
215
  fn=filter_rows_fn,
216
  inputs=[
217
  # fixed
218
+ machine_value,
219
+ subsets_value,
220
+ backends_value,
221
+ hardware_type_value,
222
  # inputs
223
  score_slider,
224
  memory_slider,
 
244
  )
245
 
246
 
247
+ def select_columns_fn(
248
+ machine, subsets, backends, hardware_type, columns, search, llm_perf_df=None
249
+ ):
250
  if llm_perf_df is None:
251
+ llm_perf_df = get_llm_perf_df(
252
+ machine=machine,
253
+ subsets=subsets,
254
+ backends=backends,
255
+ hardware_type=hardware_type,
256
+ )
257
 
258
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
259
  selected_leaderboard_df = selected_leaderboard_df[
 
266
 
267
  def create_select_callback(
268
  # fixed
269
+ machine_value,
270
+ subsets_value,
271
+ backends_value,
272
+ hardware_type_value,
273
  # interactive
274
  columns_checkboxes,
275
  search_bar,
 
278
  ):
279
  columns_checkboxes.change(
280
  fn=select_columns_fn,
281
+ inputs=[
282
+ machine_value,
283
+ subsets_value,
284
+ backends_value,
285
+ hardware_type_value,
286
+ columns_checkboxes,
287
+ search_bar,
288
+ ],
289
  outputs=[leaderboard_table],
290
  )
291
  search_bar.change(
292
  fn=select_columns_fn,
293
+ inputs=[
294
+ machine_value,
295
+ subsets_value,
296
+ backends_value,
297
+ hardware_type_value,
298
+ columns_checkboxes,
299
+ search_bar,
300
+ ],
301
  outputs=[leaderboard_table],
302
  )