add intel CPU to leaderboard

#32
by baptistecolle HF staff - opened
Files changed (6) hide show
  1. .gitignore +2 -0
  2. app.py +20 -16
  3. hardware.yml +46 -0
  4. src/hardware.py +26 -0
  5. src/llm_perf.py +7 -5
  6. src/panel.py +39 -39
.gitignore CHANGED
@@ -4,5 +4,7 @@ __pycache__/
4
  *ipynb
5
  .vscode/
6
 
 
 
7
  dataset/
8
  .venv
 
4
  *ipynb
5
  .vscode/
6
 
7
+ work-in-progress/
8
+
9
  dataset/
10
  .venv
app.py CHANGED
@@ -4,6 +4,7 @@ from src.assets import custom_css
4
 
5
  # from src.attention import create_attn_plots
6
  from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, TITLE
 
7
  from src.leaderboard import create_leaderboard_table
8
  from src.llm_perf import get_llm_perf_df
9
  from src.map import create_lat_score_mem_plot
@@ -13,14 +14,7 @@ from src.panel import (
13
  create_select_callback,
14
  )
15
 
16
- # from custom_kernels import create_quant_krnl_plots
17
-
18
- MACHINE_TO_HARDWARE = {
19
- "1xA10": "A10-24GB-150W 🖥️",
20
- "1xA100": "A100-80GB-275W 🖥️",
21
- "1xT4": "T4-16GB-70W 🖥️",
22
- # "1xH100": "H100-80GB-700W 🖥️",
23
- }
24
 
25
 
26
  demo = gr.Blocks(css=custom_css)
@@ -29,12 +23,19 @@ with demo:
29
  gr.HTML(TITLE, elem_classes="title")
30
  ####################### HARDWARE TABS #######################
31
  with gr.Tabs(elem_classes="tabs"):
32
- for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
33
- with gr.TabItem(hardware, id=id):
34
- ####################### CONTROL PANEL #######################
 
 
 
 
 
 
35
  (
36
  filter_button,
37
  machine_textbox,
 
38
  score_slider,
39
  memory_slider,
40
  backend_checkboxes,
@@ -42,17 +43,18 @@ with demo:
42
  optimization_checkboxes,
43
  quantization_checkboxes,
44
  kernels_checkboxes,
45
- ) = create_control_panel(machine=machine)
46
  ####################### HARDWARE SUBTABS #######################
47
  with gr.Tabs(elem_classes="subtabs"):
48
- open_llm_perf_df = get_llm_perf_df(machine=machine)
49
  ####################### LEADERBOARD TAB #######################
50
  with gr.TabItem("Leaderboard 🏅", id=0):
51
  search_bar, columns_checkboxes, leaderboard_table = (
52
  create_leaderboard_table(open_llm_perf_df)
53
  )
54
- with gr.TabItem("Find Your Best Model 🧭", id=1):
55
- lat_score_mem_plot = create_lat_score_mem_plot(open_llm_perf_df)
 
56
  ###################### ATTENTIONS SPEEDUP TAB #######################
57
  # with gr.TabItem("Attention 📈", id=2):
58
  # attn_prefill_plot, attn_decode_plot = create_attn_plots(
@@ -69,6 +71,7 @@ with demo:
69
  filter_button,
70
  # inputs
71
  machine_textbox,
 
72
  score_slider,
73
  memory_slider,
74
  backend_checkboxes,
@@ -91,6 +94,7 @@ with demo:
91
  create_select_callback(
92
  # inputs
93
  machine_textbox,
 
94
  # interactive
95
  columns_checkboxes,
96
  search_bar,
@@ -99,7 +103,7 @@ with demo:
99
  )
100
 
101
  ####################### ABOUT TAB #######################
102
- with gr.TabItem("About 📖", id=3):
103
  gr.Markdown(ABOUT, elem_classes="descriptive-text")
104
  ####################### CITATION
105
  with gr.Row():
 
4
 
5
  # from src.attention import create_attn_plots
6
  from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, TITLE
7
+ from src.hardware import load_hardware_configs
8
  from src.leaderboard import create_leaderboard_table
9
  from src.llm_perf import get_llm_perf_df
10
  from src.map import create_lat_score_mem_plot
 
14
  create_select_callback,
15
  )
16
 
17
+ configs = load_hardware_configs("hardware.yml")
 
 
 
 
 
 
 
18
 
19
 
20
  demo = gr.Blocks(css=custom_css)
 
23
  gr.HTML(TITLE, elem_classes="title")
24
  ####################### HARDWARE TABS #######################
25
  with gr.Tabs(elem_classes="tabs"):
26
+ for id, config in enumerate(configs):
27
+ with gr.TabItem(config.description, id=id):
28
+ ####################### HARDWARE DETAILS #######################
29
+ if config.detail:
30
+ gr.Markdown(config.detail, elem_classes="descriptive-text")
31
+
32
+
33
+
34
+ # ####################### CONTROL PANEL #######################
35
  (
36
  filter_button,
37
  machine_textbox,
38
+ subsets_values,
39
  score_slider,
40
  memory_slider,
41
  backend_checkboxes,
 
43
  optimization_checkboxes,
44
  quantization_checkboxes,
45
  kernels_checkboxes,
46
+ ) = create_control_panel(machine=config.machine, subsets=config.subsets, hardware_provider=config.hardware_provider)
47
  ####################### HARDWARE SUBTABS #######################
48
  with gr.Tabs(elem_classes="subtabs"):
49
+ open_llm_perf_df = get_llm_perf_df(machine=config.machine, subsets=config.subsets)
50
  ####################### LEADERBOARD TAB #######################
51
  with gr.TabItem("Leaderboard 🏅", id=0):
52
  search_bar, columns_checkboxes, leaderboard_table = (
53
  create_leaderboard_table(open_llm_perf_df)
54
  )
55
+ if config.hardware_provider != "intel": # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
56
+ with gr.TabItem("Find Your Best Model 🧭", id=1):
57
+ lat_score_mem_plot = create_lat_score_mem_plot(open_llm_perf_df)
58
  ###################### ATTENTIONS SPEEDUP TAB #######################
59
  # with gr.TabItem("Attention 📈", id=2):
60
  # attn_prefill_plot, attn_decode_plot = create_attn_plots(
 
71
  filter_button,
72
  # inputs
73
  machine_textbox,
74
+ subsets_values,
75
  score_slider,
76
  memory_slider,
77
  backend_checkboxes,
 
94
  create_select_callback(
95
  # inputs
96
  machine_textbox,
97
+ subsets_values,
98
  # interactive
99
  columns_checkboxes,
100
  search_bar,
 
103
  )
104
 
105
  ####################### ABOUT TAB #######################
106
+ with gr.TabItem("About 📖", id=len(configs)):
107
  gr.Markdown(ABOUT, elem_classes="descriptive-text")
108
  ####################### CITATION
109
  with gr.Row():
hardware.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - machine: 1xA10
2
+ description: A10-24GB-150W 🖥️
3
+ hardware_provider: nvidia
4
+ hardware_type: gpu
5
+ subsets:
6
+ - unquantized
7
+ - awq
8
+ - bnb
9
+ - gptq
10
+ backends:
11
+ - pytorch
12
+
13
+ - machine: 1xA100
14
+ description: A100-80GB-275W 🖥️
15
+ hardware_provider: nvidia
16
+ hardware_type: gpu
17
+ subsets:
18
+ - unquantized
19
+ - awq
20
+ - bnb
21
+ - gptq
22
+ backends:
23
+ - pytorch
24
+
25
+ - machine: 1xT4
26
+ description: T4-16GB-70W 🖥️
27
+ hardware_provider: nvidia
28
+ hardware_type: gpu
29
+ subsets:
30
+ - unquantized
31
+ - awq
32
+ - bnb
33
+ - gptq
34
+ backends:
35
+ - pytorch
36
+
37
+ - machine: 32vCPU-C7i
38
+ description: Intel-Xeon-SPR-385W 🖥️
39
+ detail: |
40
+ We tested the [32vCPU AWS C7i](https://aws.amazon.com/ec2/instance-types/c7i/) instance for the benchmark.
41
+ hardware_provider: intel
42
+ hardware_type: cpu
43
+ subsets:
44
+ - unquantized
45
+ backends:
46
+ - pytorch
src/hardware.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List
2
+
3
+ import yaml
4
+
5
+
6
+ class HardwareConfig:
7
+ def __init__(self, data: Dict[str, Any]):
8
+ self.machine = data["machine"]
9
+ self.description = data["description"]
10
+ self.hardware_provider = data["hardware_provider"]
11
+ self.hardware_type = data["hardware_type"]
12
+ self.subsets = data["subsets"]
13
+ self.backends = data["backends"]
14
+ self.detail = data.get("detail", None)
15
+
16
+ def __repr__(self):
17
+ return (
18
+ f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
19
+ f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
20
+ )
21
+
22
+
23
+ def load_hardware_configs(file_path: str) -> List[HardwareConfig]:
24
+ with open(file_path, "r") as file:
25
+ data = yaml.safe_load(file)
26
+ return [HardwareConfig(config) for config in data]
src/llm_perf.py CHANGED
@@ -1,7 +1,10 @@
1
  import os
 
2
 
3
  import pandas as pd
4
 
 
 
5
  from .utils import process_kernels, process_quantizations
6
 
7
  DATASET_DIRECTORY = "dataset"
@@ -28,13 +31,12 @@ COLUMNS_MAPPING = {
28
  "#Params (B)": "Params (B)",
29
  }
30
  SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"]
31
- SUBSETS = ["unquantized", "awq", "bnb", "gptq"]
32
  SORTING_ASCENDING = [False, True, False]
33
 
34
 
35
- def get_raw_llm_perf_df(machine: str = "1xA10"):
36
  dfs = []
37
- for subset in SUBSETS:
38
  try:
39
  dfs.append(
40
  pd.read_csv(
@@ -110,14 +112,14 @@ def processed_llm_perf_df(llm_perf_df):
110
  return llm_perf_df
111
 
112
 
113
- def get_llm_perf_df(machine: str = "1xA10"):
114
  if not os.path.exists(DATASET_DIRECTORY):
115
  os.makedirs(DATASET_DIRECTORY)
116
 
117
  if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
118
  llm_perf_df = pd.read_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv")
119
  else:
120
- llm_perf_df = get_raw_llm_perf_df(machine)
121
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
122
  llm_perf_df.to_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False)
123
 
 
1
  import os
2
+ from typing import List
3
 
4
  import pandas as pd
5
 
6
+ from src.hardware import HardwareConfig
7
+
8
  from .utils import process_kernels, process_quantizations
9
 
10
  DATASET_DIRECTORY = "dataset"
 
31
  "#Params (B)": "Params (B)",
32
  }
33
  SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"]
 
34
  SORTING_ASCENDING = [False, True, False]
35
 
36
 
37
+ def get_raw_llm_perf_df(machine: str, subsets: List[str]):
38
  dfs = []
39
+ for subset in subsets:
40
  try:
41
  dfs.append(
42
  pd.read_csv(
 
112
  return llm_perf_df
113
 
114
 
115
+ def get_llm_perf_df(machine: str, subsets: List[str]):
116
  if not os.path.exists(DATASET_DIRECTORY):
117
  os.makedirs(DATASET_DIRECTORY)
118
 
119
  if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
120
  llm_perf_df = pd.read_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv")
121
  else:
122
+ llm_perf_df = get_raw_llm_perf_df(machine, subsets)
123
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
124
  llm_perf_df.to_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False)
125
 
src/panel.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
 
3
  from src.leaderboard import get_leaderboard_df
@@ -8,9 +10,26 @@ from src.llm_perf import get_llm_perf_df
8
  from src.map import get_lat_score_mem_fig
9
 
10
 
11
- def create_control_panel(machine: str):
12
  # controls
13
  machine_textbox = gr.Textbox(value=machine, visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  with gr.Accordion("Control Panel 🎛️", open=False, elem_id="control-panel"):
15
  with gr.Row():
16
  with gr.Column(scale=2, variant="panel"):
@@ -32,8 +51,8 @@ def create_control_panel(machine: str):
32
  with gr.Column(scale=1, variant="panel"):
33
  backend_checkboxes = gr.CheckboxGroup(
34
  label="Backends 🏭",
35
- choices=["pytorch"],
36
- value=["pytorch"],
37
  info="☑️ Select the backends",
38
  elem_id="backend-checkboxes",
39
  )
@@ -49,8 +68,8 @@ def create_control_panel(machine: str):
49
  with gr.Column(scale=1, variant="panel"):
50
  optimization_checkboxes = gr.CheckboxGroup(
51
  label="Attentions 👁️",
52
- choices=["Eager", "SDPA", "FAv2"],
53
- value=["Eager", "SDPA", "FAv2"],
54
  info="☑️ Select the optimization",
55
  elem_id="optimization-checkboxes",
56
  )
@@ -58,20 +77,8 @@ def create_control_panel(machine: str):
58
  with gr.Column(scale=1, variant="panel"):
59
  quantization_checkboxes = gr.CheckboxGroup(
60
  label="Quantizations 🗜️",
61
- choices=[
62
- "Unquantized",
63
- "BnB.4bit",
64
- "BnB.8bit",
65
- "AWQ.4bit",
66
- "GPTQ.4bit",
67
- ],
68
- value=[
69
- "Unquantized",
70
- "BnB.4bit",
71
- "BnB.8bit",
72
- "AWQ.4bit",
73
- "GPTQ.4bit",
74
- ],
75
  info="☑️ Select the quantization schemes",
76
  elem_id="quantization-checkboxes",
77
  elem_classes="boxed-option",
@@ -79,20 +86,8 @@ def create_control_panel(machine: str):
79
  with gr.Column(scale=1, variant="panel"):
80
  kernels_checkboxes = gr.CheckboxGroup(
81
  label="Kernels ⚛️",
82
- choices=[
83
- "No Kernel",
84
- "GPTQ.ExllamaV1",
85
- "GPTQ.ExllamaV2",
86
- "AWQ.GEMM",
87
- "AWQ.GEMV",
88
- ],
89
- value=[
90
- "No Kernel",
91
- "GPTQ.ExllamaV1",
92
- "GPTQ.ExllamaV2",
93
- "AWQ.GEMM",
94
- "AWQ.GEMV",
95
- ],
96
  info="☑️ Select the custom kernels",
97
  elem_id="kernel-checkboxes",
98
  elem_classes="boxed-option",
@@ -107,6 +102,7 @@ def create_control_panel(machine: str):
107
  return (
108
  filter_button,
109
  machine_textbox,
 
110
  score_slider,
111
  memory_slider,
112
  backend_checkboxes,
@@ -119,6 +115,7 @@ def create_control_panel(machine: str):
119
 
120
  def filter_rows_fn(
121
  machine,
 
122
  # inputs
123
  score,
124
  memory,
@@ -131,7 +128,7 @@ def filter_rows_fn(
131
  columns,
132
  search,
133
  ):
134
- llm_perf_df = get_llm_perf_df(machine=machine)
135
  # print(attentions)
136
  # print(llm_perf_df["Attention 👁️"].unique())
137
  filtered_llm_perf_df = llm_perf_df[
@@ -145,7 +142,7 @@ def filter_rows_fn(
145
  & (llm_perf_df["Memory (MB)"] <= memory)
146
  ]
147
  selected_filtered_llm_perf_df = select_columns_fn(
148
- machine, columns, search, filtered_llm_perf_df
149
  )
150
  selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
151
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
@@ -172,6 +169,7 @@ def create_control_callback(
172
  filter_button,
173
  # fixed
174
  machine_textbox,
 
175
  # inputs
176
  score_slider,
177
  memory_slider,
@@ -198,6 +196,7 @@ def create_control_callback(
198
  inputs=[
199
  # fixed
200
  machine_textbox,
 
201
  # inputs
202
  score_slider,
203
  memory_slider,
@@ -223,9 +222,9 @@ def create_control_callback(
223
  )
224
 
225
 
226
- def select_columns_fn(machine, columns, search, llm_perf_df=None):
227
  if llm_perf_df is None:
228
- llm_perf_df = get_llm_perf_df(machine=machine)
229
 
230
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
231
  selected_leaderboard_df = selected_leaderboard_df[
@@ -239,6 +238,7 @@ def select_columns_fn(machine, columns, search, llm_perf_df=None):
239
  def create_select_callback(
240
  # fixed
241
  machine_textbox,
 
242
  # interactive
243
  columns_checkboxes,
244
  search_bar,
@@ -247,11 +247,11 @@ def create_select_callback(
247
  ):
248
  columns_checkboxes.change(
249
  fn=select_columns_fn,
250
- inputs=[machine_textbox, columns_checkboxes, search_bar],
251
  outputs=[leaderboard_table],
252
  )
253
  search_bar.change(
254
  fn=select_columns_fn,
255
- inputs=[machine_textbox, columns_checkboxes, search_bar],
256
  outputs=[leaderboard_table],
257
  )
 
1
+ from typing import List
2
+
3
  import gradio as gr
4
 
5
  from src.leaderboard import get_leaderboard_df
 
10
  from src.map import get_lat_score_mem_fig
11
 
12
 
13
+ def create_control_panel(machine: str, subsets: List[str], hardware_provider: str):
14
  # controls
15
  machine_textbox = gr.Textbox(value=machine, visible=False)
16
+ subsets_values = gr.State(value=subsets)
17
+
18
+
19
+ if hardware_provider == "nvidia":
20
+ backends = ["pytorch"]
21
+ attention_implementations = ["Eager", "SDPA", "FAv2"]
22
+ quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
23
+ kernels = ["No Kernel", "GPTQ.ExllamaV1", "GPTQ.ExllamaV2", "AWQ.GEMM", "AWQ.GEMV"]
24
+ elif hardware_provider == "intel":
25
+ backends = ["pytorch", "onnxruntime", "openvino"]
26
+ attention_implementations = ["Eager"]
27
+ quantizations = ["Unquantized"]
28
+ kernels = ["No Kernel"]
29
+ else:
30
+ raise ValueError(f"Unknown hardware provider: {hardware_provider}")
31
+
32
+
33
  with gr.Accordion("Control Panel 🎛️", open=False, elem_id="control-panel"):
34
  with gr.Row():
35
  with gr.Column(scale=2, variant="panel"):
 
51
  with gr.Column(scale=1, variant="panel"):
52
  backend_checkboxes = gr.CheckboxGroup(
53
  label="Backends 🏭",
54
+ choices=backends,
55
+ value=backends,
56
  info="☑️ Select the backends",
57
  elem_id="backend-checkboxes",
58
  )
 
68
  with gr.Column(scale=1, variant="panel"):
69
  optimization_checkboxes = gr.CheckboxGroup(
70
  label="Attentions 👁️",
71
+ choices=attention_implementations,
72
+ value=attention_implementations,
73
  info="☑️ Select the optimization",
74
  elem_id="optimization-checkboxes",
75
  )
 
77
  with gr.Column(scale=1, variant="panel"):
78
  quantization_checkboxes = gr.CheckboxGroup(
79
  label="Quantizations 🗜️",
80
+ choices=quantizations,
81
+ value=quantizations,
 
 
 
 
 
 
 
 
 
 
 
 
82
  info="☑️ Select the quantization schemes",
83
  elem_id="quantization-checkboxes",
84
  elem_classes="boxed-option",
 
86
  with gr.Column(scale=1, variant="panel"):
87
  kernels_checkboxes = gr.CheckboxGroup(
88
  label="Kernels ⚛️",
89
+ choices=kernels,
90
+ value=kernels,
 
 
 
 
 
 
 
 
 
 
 
 
91
  info="☑️ Select the custom kernels",
92
  elem_id="kernel-checkboxes",
93
  elem_classes="boxed-option",
 
102
  return (
103
  filter_button,
104
  machine_textbox,
105
+ subsets_values,
106
  score_slider,
107
  memory_slider,
108
  backend_checkboxes,
 
115
 
116
  def filter_rows_fn(
117
  machine,
118
+ subsets,
119
  # inputs
120
  score,
121
  memory,
 
128
  columns,
129
  search,
130
  ):
131
+ llm_perf_df = get_llm_perf_df(machine=machine, subsets=subsets)
132
  # print(attentions)
133
  # print(llm_perf_df["Attention 👁️"].unique())
134
  filtered_llm_perf_df = llm_perf_df[
 
142
  & (llm_perf_df["Memory (MB)"] <= memory)
143
  ]
144
  selected_filtered_llm_perf_df = select_columns_fn(
145
+ machine, subsets, columns, search, filtered_llm_perf_df
146
  )
147
  selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
148
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
 
169
  filter_button,
170
  # fixed
171
  machine_textbox,
172
+ subsets_textbox,
173
  # inputs
174
  score_slider,
175
  memory_slider,
 
196
  inputs=[
197
  # fixed
198
  machine_textbox,
199
+ subsets_textbox,
200
  # inputs
201
  score_slider,
202
  memory_slider,
 
222
  )
223
 
224
 
225
+ def select_columns_fn(machine, subsets, columns, search, llm_perf_df=None):
226
  if llm_perf_df is None:
227
+ llm_perf_df = get_llm_perf_df(machine=machine, subsets=subsets)
228
 
229
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
230
  selected_leaderboard_df = selected_leaderboard_df[
 
238
  def create_select_callback(
239
  # fixed
240
  machine_textbox,
241
+ subsets_values,
242
  # interactive
243
  columns_checkboxes,
244
  search_bar,
 
247
  ):
248
  columns_checkboxes.change(
249
  fn=select_columns_fn,
250
+ inputs=[machine_textbox, subsets_values, columns_checkboxes, search_bar],
251
  outputs=[leaderboard_table],
252
  )
253
  search_bar.change(
254
  fn=select_columns_fn,
255
+ inputs=[machine_textbox, subsets_values, columns_checkboxes, search_bar],
256
  outputs=[leaderboard_table],
257
  )