Spaces:

optimum
/

llm-perf-leaderboard

Running

App Files Files Community

IlyasMoutawwakil HF staff commited on Apr 16

Commit

a8a6326

•

1 Parent(s): 3a67001

update

Browse files

Files changed (7) hide show

README.md +1 -1
app.py +13 -7
src/assets.py +31 -5
src/content.py +28 -79
src/control_panel.py +108 -81
src/leaderboard.py +20 -13
src/llm_perf.py +13 -8

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏆🏋️
 colorFrom: green
 colorTo: indigo
 sdk: gradio
-sdk_version: 4.9.0
 app_file: app.py
 pinned: true
 license: apache-2.0

 colorFrom: green
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.26.0
 app_file: app.py
 pinned: true
 license: apache-2.0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import gradio as gr
-from src.control_panel import create_control_panel, create_control_callback
 from src.latency_score_memory import create_lat_score_mem_plot
 from src.quantization_kernels import create_quant_plots
 from src.leaderboard import create_leaderboard_table
@@ -14,8 +14,6 @@ from src.content import (
     LOGO,
     TITLE,
     ABOUT,
-    INTRODUCTION,
-    EXAMPLE_CONFIG,
     CITATION_BUTTON,
     CITATION_BUTTON_LABEL,
 )
@@ -29,7 +27,6 @@ demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(LOGO, elem_classes="logo")
     gr.HTML(TITLE, elem_classes="title")
-    gr.Markdown(INTRODUCTION, elem_classes="descriptive-text")
     ####################### HARDWARE TABS #######################
     with gr.Tabs(elem_classes="tabs"):
         for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
@@ -51,7 +48,7 @@ with demo:
                     llm_perf_df = get_llm_perf_df(machine=machine)
                     ####################### LEADERBOARD TAB #######################
                     with gr.TabItem("Leaderboard 🏅", id=0):
-                        leaderboard_table = create_leaderboard_table(llm_perf_df)
                         lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df)
                     ####################### BETTERTRANSFORMER SPEEDUP TAB #######################
                     with gr.TabItem("BetterTransformer 📈", id=2):
@@ -73,6 +70,7 @@ with demo:
                     datatype_checkboxes,
                     optimization_checkboxes,
                     quantization_checkboxes,
                     # outputs
                     leaderboard_table,
                     lat_score_mem_plot,
@@ -83,10 +81,18 @@ with demo:
                     quant_prefill_plot,
                     quant_decode_plot,
                 )
         ####################### ABOUT TAB #######################
         with gr.TabItem("About 📖", id=3):
-            gr.HTML(ABOUT, elem_classes="descriptive-text")
-            gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text")
     ####################### CITATION
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

 import gradio as gr
+from src.control_panel import create_control_panel, create_control_callback, create_select_callback
 from src.latency_score_memory import create_lat_score_mem_plot
 from src.quantization_kernels import create_quant_plots
 from src.leaderboard import create_leaderboard_table
     LOGO,
     TITLE,
     ABOUT,
     CITATION_BUTTON,
     CITATION_BUTTON_LABEL,
 )
 with demo:
     gr.HTML(LOGO, elem_classes="logo")
     gr.HTML(TITLE, elem_classes="title")
     ####################### HARDWARE TABS #######################
     with gr.Tabs(elem_classes="tabs"):
         for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
                     llm_perf_df = get_llm_perf_df(machine=machine)
                     ####################### LEADERBOARD TAB #######################
                     with gr.TabItem("Leaderboard 🏅", id=0):
+                        leaderboard_table, columns_checkboxes = create_leaderboard_table(llm_perf_df)
                         lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df)
                     ####################### BETTERTRANSFORMER SPEEDUP TAB #######################
                     with gr.TabItem("BetterTransformer 📈", id=2):
                     datatype_checkboxes,
                     optimization_checkboxes,
                     quantization_checkboxes,
+                    columns_checkboxes,
                     # outputs
                     leaderboard_table,
                     lat_score_mem_plot,
                     quant_prefill_plot,
                     quant_decode_plot,
                 )
+                create_select_callback(
+                    # inputs
+                    machine_textbox,
+                    columns_checkboxes,
+                    # outputs
+                    leaderboard_table,
+                )
         ####################### ABOUT TAB #######################
         with gr.TabItem("About 📖", id=3):
+            gr.Markdown(ABOUT, elem_classes="descriptive-text")
     ####################### CITATION
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

src/assets.py CHANGED Viewed

@@ -17,16 +17,42 @@ custom_css = """
     font-size: 20px;
 }
-#citation-button span {
     font-size: 16px !important;
 }
-#citation-button textarea {
     font-size: 16px !important;
 }
-#citation-button > label > button {
-    margin: 6px;
-    transform: scale(1.3);
 }
 """

     font-size: 20px;
 }
+.descriptive-text span {
     font-size: 16px !important;
 }
+#control-panel span {
+    font-size: 20px !important;
+}
+#search-bar span {
+    font-size: 16px !important;
+}
+#threshold-slider span {
+    font-size: 16px !important;
+}
+#memory-slider span {
+    font-size: 16px !important;
+}
+#columns-checkboxes span {
+    font-size: 16px !important;
+}
+#backend-checkboxes span {
+    font-size: 16px !important;
+}
+#dtype-checkboxes span {
+    font-size: 16px !important;
+}
+#optimization-checkboxes span {
+    font-size: 16px !important;
+}
+#quantization-checkboxes span {
     font-size: 16px !important;
 }
+#leaderboard-table td:first-child,
+#leaderboard-table th:first-child {
+    max-width: 300px;
+    overflow: auto;
+    white-space: nowrap;
 }
 """

src/content.py CHANGED Viewed

@@ -2,85 +2,34 @@ LOGO = '<img src="https://raw.githubusercontent.com/huggingface/optimum-benchmar
 TITLE = """<h1 align="center" id="space-title">🤗 LLM-Perf Leaderboard 🏋️</h1>"""
-INTRODUCTION = """
-The 🤗 LLM-Perf Leaderboard 🏋️ aims to benchmark the performance (latency, throughput, memory & energy) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
-Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
-- Model evaluation requests should be made in the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the [🤗 LLM-Perf Leaderboard 🏋️](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) automatically.
-- Hardware/Backend/Optimization performance requests should be made in the [llm-perf-backend repository](https://github.com/IlyasMoutawwakil/llm-perf-backend) and will be added to the [🤗 LLM-Perf Leaderboard 🏋️](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) automatically.
-"""
-ABOUT = """<h3>About the 🤗 LLM-Perf Leaderboard 🏋️</h3>
-<ul>
-    <li>To avoid communication-dependent results, only one GPU is used.</li>
-    <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
-    <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 256 tokens.</li>
-    <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
-    <li>We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.</li>
-</ul>
-"""
-EXAMPLE_CONFIG = """
-Here's an example of the configuration file used to benchmark the models with Optimum-Benchmark:
-```yaml
-defaults:
-  - backend: pytorch
-  - _base_ # inheriting from base config
-  - _self_ # for hydra 1.1 compatibility
-experiment_name: pytorch+cuda+float16+gptq-4bit+exllama-v1
-device: cuda
-backend:
-  no_weights: true
-  torch_dtype: float16
-  quantization_scheme: gptq
-  quantization_config:
-    bits: 4
-    use_cuda_fp16: false
-    use_exllama: true
-    exllama_config:
-      version: 1
-```
-Where the base config is:
-```yaml
-defaults:
-  - benchmark: inference # default benchmark
-  - launcher: process # isolated process launcher
-  - experiment # inheriting from experiment config
-  - _self_ # for hydra 1.1 compatibility
-  - override hydra/job_logging: colorlog # colorful logging
-  - override hydra/hydra_logging: colorlog # colorful logging
-hydra:
-  run:
-    dir: dataset/${oc.env:HOSTNAME}/${experiment_name}/${model}
-  job:
-    chdir: true
-    env_set:
-      COUNTRY_ISO_CODE: FRA
-      OVERRIDE_BENCHMARKS: 0
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-backend:
-  continuous_isolation: true
-benchmark:
-  duration: 10
-  memory: true
-  energy: true
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-  new_tokens: 256
-hub_kwargs:
-  trust_remote_code: true
-```
 """

 TITLE = """<h1 align="center" id="space-title">🤗 LLM-Perf Leaderboard 🏋️</h1>"""
+ABOUT = """
+## 📝 About
+The 🤗 LLM-Perf Leaderboard 🏋️ is a laderboard at the intersection of quality and performance.
+Its aim is to benchmark the performance (latency, throughput, memory & energy)
+of Large Language Models (LLMs) with different hardwares, backends and optimizations
+using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
+Anyone from the community can request a new base model or hardware/backend/optimization
+configuration for automated benchmarking:
+- Model evaluation requests should be made in the
+[🤗 Open LLM Leaderboard 🏅](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
+we scrape the list of pretrained base models from there.
+- Hardware/Backend/Optimization configuration requests should be made in the
+[🤗 LLM-Perf Leaderboard 🏋️](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
+[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
+## ✍️ Details
+- To avoid communication-dependent results, only one GPU is used.
+- Score is the average evaluation score obtained from the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+- LLMs are running on a singleton batch with a prompt size of 256 and generating a 256 tokens.
+- Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.
+- We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.
+All of our benchmarks are ran by this single script
+[benchmark_cuda_pytorch.py](https://github.com/huggingface/optimum-benchmark/blob/llm-perf/llm-perf/benchmark_cuda_pytorch.py)
+using the power of [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) to garantee reproducibility and consistency.
 """

src/control_panel.py CHANGED Viewed

@@ -9,90 +9,91 @@ from src.quantization_kernels import get_quant_prefill_fig, get_quant_decode_fig
 def create_control_panel(machine: str = "hf-dgx-01"):
-    # descriptive text
-    gr.HTML("Use this control panel to filter the leaderboard.", elem_id="text")
     # controls
     machine_textbox = gr.Textbox(value=machine, visible=False)
-    with gr.Row():
-        with gr.Column():
-            search_bar = gr.Textbox(
-                label="Model 🤗",
-                info="🔍 Search for a model name",
-                elem_id="search-bar",
             )
-    with gr.Row():
-        with gr.Column(scale=1, variant="panel"):
-            score_slider = gr.Slider(
-                label="Open LLM Score (%) 📈",
-                info="🎚️ Slide to minimum Open LLM score",
-                value=0,
-                elem_id="threshold-slider",
-            )
-        with gr.Column(scale=1, variant="panel"):
-            memory_slider = gr.Slider(
-                label="Peak Memory (MB) 📈",
-                info="🎚️ Slide to maximum Peak Memory",
-                minimum=0,
-                maximum=80 * 1024,
-                value=80 * 1024,
-                elem_id="memory-slider",
-            )
-        with gr.Column(scale=1):
-            backend_checkboxes = gr.CheckboxGroup(
-                label="Backends 🏭",
-                choices=["pytorch"],
-                value=["pytorch"],
-                info="☑️ Select the backends",
-                elem_id="backend-checkboxes",
-            )
-    with gr.Row():
-        with gr.Column(scale=1, variant="panel"):
-            datatype_checkboxes = gr.CheckboxGroup(
-                label="Load DTypes 📥",
-                choices=["float32", "float16", "bfloat16"],
-                value=["float32", "float16", "bfloat16"],
-                info="☑️ Select the load data types",
-                elem_id="dtype-checkboxes",
-            )
-        with gr.Column(scale=1, variant="panel"):
-            optimization_checkboxes = gr.CheckboxGroup(
-                label="Optimizations 🛠️",
-                choices=["None", "BetterTransformer", "FlashAttentionV2"],
-                value=["None", "BetterTransformer", "FlashAttentionV2"],
-                info="☑️ Select the optimization",
-                elem_id="optimization-checkboxes",
-            )
-        with gr.Column(scale=2):
-            quantization_checkboxes = gr.CheckboxGroup(
-                label="Quantizations 🗜️",
-                choices=[
-                    "None",
-                    "BnB.4bit",
-                    "BnB.8bit",
-                    "GPTQ.4bit",
-                    "GPTQ.4bit+ExllamaV1",
-                    "GPTQ.4bit+ExllamaV2",
-                    "AWQ.4bit+GEMM",
-                    "AWQ.4bit+GEMV",
-                ],
-                value=[
-                    "None",
-                    "BnB.4bit",
-                    "BnB.8bit",
-                    "GPTQ.4bit",
-                    "GPTQ.4bit+ExllamaV1",
-                    "GPTQ.4bit+ExllamaV2",
-                    "AWQ.4bit+GEMM",
-                    "AWQ.4bit+GEMV",
-                ],
-                info="☑️ Select the quantization schemes",
-                elem_id="quantization-checkboxes",
-            )
-    with gr.Row():
-        filter_button = gr.Button(
-            value="Filter 🚀",
-            elem_id="filter-button",
-        )
     return (
         filter_button,
@@ -114,6 +115,7 @@ def filter_fn(
     datatypes,
     optimizations,
     quantizations,
     score,
     memory,
 ):
@@ -128,6 +130,7 @@ def filter_fn(
         & (raw_df["Allocated Memory (MB)"] <= memory)
     ]
     filtered_leaderboard_df = get_leaderboard_df(filtered_df)
     filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_df)
     filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
     filtered_bt_decode_fig = get_bt_decode_fig(filtered_df)
@@ -160,6 +163,7 @@ def create_control_callback(
     datatype_checkboxes,
     optimization_checkboxes,
     quantization_checkboxes,
     # outputs
     leaderboard_table,
     lat_score_mem_plot,
@@ -179,6 +183,7 @@ def create_control_callback(
             datatype_checkboxes,
             optimization_checkboxes,
             quantization_checkboxes,
             score_slider,
             memory_slider,
         ],
@@ -193,3 +198,25 @@ def create_control_callback(
             quant_decode_plot,
         ],
     )

 def create_control_panel(machine: str = "hf-dgx-01"):
     # controls
     machine_textbox = gr.Textbox(value=machine, visible=False)
+    with gr.Accordion("Control Panel 🎛️", open=False, elem_id="control-panel"):
+        with gr.Row():
+            with gr.Column():
+                search_bar = gr.Textbox(
+                    label="Model 🤗",
+                    info="🔍 Search for a model name",
+                    elem_id="search-bar",
+                )
+        with gr.Row():
+            with gr.Column(scale=1, variant="panel"):
+                score_slider = gr.Slider(
+                    label="Open LLM Score (%) 📈",
+                    info="🎚️ Slide to minimum Open LLM score",
+                    value=0,
+                    elem_id="threshold-slider",
+                )
+            with gr.Column(scale=1, variant="panel"):
+                memory_slider = gr.Slider(
+                    label="Peak Memory (MB) 📈",
+                    info="🎚️ Slide to maximum Peak Memory",
+                    minimum=0,
+                    maximum=80 * 1024,
+                    value=80 * 1024,
+                    elem_id="memory-slider",
+                )
+            with gr.Column(scale=1):
+                backend_checkboxes = gr.CheckboxGroup(
+                    label="Backends 🏭",
+                    choices=["pytorch"],
+                    value=["pytorch"],
+                    info="☑️ Select the backends",
+                    elem_id="backend-checkboxes",
+                )
+        with gr.Row():
+            with gr.Column(scale=1, variant="panel"):
+                datatype_checkboxes = gr.CheckboxGroup(
+                    label="Load DTypes 📥",
+                    choices=["float32", "float16", "bfloat16"],
+                    value=["float32", "float16", "bfloat16"],
+                    info="☑️ Select the load data types",
+                    elem_id="dtype-checkboxes",
+                )
+            with gr.Column(scale=1, variant="panel"):
+                optimization_checkboxes = gr.CheckboxGroup(
+                    label="Optimizations 🛠️",
+                    choices=["None", "BetterTransformer", "FlashAttentionV2"],
+                    value=["None", "BetterTransformer", "FlashAttentionV2"],
+                    info="☑️ Select the optimization",
+                    elem_id="optimization-checkboxes",
+                )
+            with gr.Column(scale=2):
+                quantization_checkboxes = gr.CheckboxGroup(
+                    label="Quantizations 🗜️",
+                    choices=[
+                        "None",
+                        "BnB.4bit",
+                        "BnB.8bit",
+                        "GPTQ.4bit",
+                        "GPTQ.4bit+ExllamaV1",
+                        "GPTQ.4bit+ExllamaV2",
+                        "AWQ.4bit+GEMM",
+                        "AWQ.4bit+GEMV",
+                    ],
+                    value=[
+                        "None",
+                        "BnB.4bit",
+                        "BnB.8bit",
+                        "GPTQ.4bit",
+                        "GPTQ.4bit+ExllamaV1",
+                        "GPTQ.4bit+ExllamaV2",
+                        "AWQ.4bit+GEMM",
+                        "AWQ.4bit+GEMV",
+                    ],
+                    info="☑️ Select the quantization schemes",
+                    elem_id="quantization-checkboxes",
+                    elem_classes="boxed-option",
+                )
+        with gr.Row():
+            filter_button = gr.Button(
+                value="Filter 🚀",
+                elem_id="filter-button",
+                elem_classes="boxed-option",
             )
     return (
         filter_button,
     datatypes,
     optimizations,
     quantizations,
+    columns,
     score,
     memory,
 ):
         & (raw_df["Allocated Memory (MB)"] <= memory)
     ]
     filtered_leaderboard_df = get_leaderboard_df(filtered_df)
+    filtered_leaderboard_df = filtered_leaderboard_df[columns]
     filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_df)
     filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
     filtered_bt_decode_fig = get_bt_decode_fig(filtered_df)
     datatype_checkboxes,
     optimization_checkboxes,
     quantization_checkboxes,
+    columns_checkboxes,
     # outputs
     leaderboard_table,
     lat_score_mem_plot,
             datatype_checkboxes,
             optimization_checkboxes,
             quantization_checkboxes,
+            columns_checkboxes,
             score_slider,
             memory_slider,
         ],
             quant_decode_plot,
         ],
     )
+def select_fn(machine, columns):
+    raw_df = get_llm_perf_df(machine=machine)
+    selected_leaderboard_df = get_leaderboard_df(raw_df)
+    selected_leaderboard_df = selected_leaderboard_df[columns]
+    return selected_leaderboard_df
+def create_select_callback(
+    # inputs
+    machine_textbox,
+    columns_checkboxes,
+    # outputs
+    leaderboard_table,
+):
+    columns_checkboxes.change(
+        fn=select_fn,
+        inputs=[machine_textbox, columns_checkboxes],
+        outputs=[leaderboard_table],
+    )

src/leaderboard.py CHANGED Viewed

@@ -5,21 +5,22 @@ from src.utils import model_hyperlink, process_score
 LEADERBOARD_COLUMN_TO_DATATYPE = {
     # open llm
-    "Model 🤗" :"markdown",
-    "Arch 🏛️" :"markdown",
-    "Params (B)": "number",
-    "Open LLM Score (%)": "number",
-    # deployment settings
-    "DType 📥" :"str",
-    "Backend 🏭" :"str",
-    "Optimization 🛠️" :"str",
-    "Quantization 🗜️" :"str",
     # primary measurements
     "Prefill Latency (s)": "number",
     "Decode Throughput (tokens/s)": "number",
     "Allocated Memory (MB)": "number",
     "Energy (tokens/kWh)": "number",
     # additional measurements
     "E2E Latency (s)": "number",
     "E2E Throughput (tokens/s)": "number",
     "Reserved Memory (MB)": "number",
@@ -45,16 +46,22 @@ def get_leaderboard_df(llm_perf_df):
 def create_leaderboard_table(llm_perf_df):
-    # descriptive text
-    gr.HTML("👉 Scroll to the right 👉 for additional columns.", elem_id="text")
     # get dataframe
     leaderboard_df = get_leaderboard_df(llm_perf_df)
     # create table
     leaderboard_table = gr.components.Dataframe(
         value=leaderboard_df,
         datatype=list(LEADERBOARD_COLUMN_TO_DATATYPE.values()),
         headers=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()),
-        elem_id="table",
     )
-    return leaderboard_table

 LEADERBOARD_COLUMN_TO_DATATYPE = {
     # open llm
+    "Model 🤗": "markdown",
+    "Experiment 🧪": "str",
     # primary measurements
     "Prefill Latency (s)": "number",
     "Decode Throughput (tokens/s)": "number",
     "Allocated Memory (MB)": "number",
     "Energy (tokens/kWh)": "number",
+    # deployment settings
+    "DType 📥": "str",
+    "Backend 🏭": "str",
+    "Optimization 🛠️": "str",
+    "Quantization 🗜️": "str",
     # additional measurements
+    "Arch 🏛️": "markdown",
+    "Params (B)": "number",
+    "Open LLM Score (%)": "number",
     "E2E Latency (s)": "number",
     "E2E Throughput (tokens/s)": "number",
     "Reserved Memory (MB)": "number",
 def create_leaderboard_table(llm_perf_df):
     # get dataframe
     leaderboard_df = get_leaderboard_df(llm_perf_df)
+    # create checkboxes
+    columns_checkboxes = gr.CheckboxGroup(
+        label="Columns 📊",
+        choices=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()),
+        value=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()),
+        info="☑️ Select the columns to display",
+        elem_id="columns-checkboxes",
+    )
     # create table
     leaderboard_table = gr.components.Dataframe(
         value=leaderboard_df,
         datatype=list(LEADERBOARD_COLUMN_TO_DATATYPE.values()),
         headers=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()),
+        elem_id="leaderboard-table",
     )
+    return leaderboard_table, columns_checkboxes

src/llm_perf.py CHANGED Viewed

@@ -10,8 +10,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 COLUMNS_MAPPING = {
     "Model": "Model 🤗",
-    "Arch": "Arch 🏛️",
-    "Size": "Params (B)",
     # primary measurements
     "forward.latency(s)": "Prefill Latency (s)",
     "decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
@@ -23,11 +22,13 @@ COLUMNS_MAPPING = {
     "optimization": "Optimization 🛠️",
     "quantization": "Quantization 🗜️",
     # additional measurements
     "Score": "Open LLM Score (%)",
     "generate.latency(s)": "E2E Latency (s)",
     "generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
-    # "generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
-    # "generate.max_memory_used(MB)": "Used Memory (MB)",
 }
 SORTING_COLUMNS = [
     "Open LLM Score (%)",
@@ -46,7 +47,7 @@ def get_llm_df():
         repo_type="dataset",
         token=HF_TOKEN,
     )
     llm_df = pd.read_csv("dataset/open-llm.csv")
     return llm_df
@@ -86,9 +87,11 @@ def get_llm_perf_df(machine: str = "hf-dgx-01"):
     # add optimization column
     llm_perf_df["optimization"] = llm_perf_df[["backend.to_bettertransformer", "backend.use_flash_attention_2"]].apply(
-        lambda x: "BetterTransformer"
-        if x["backend.to_bettertransformer"]
-        else ("FlashAttentionV2" if x["backend.use_flash_attention_2"] else "None"),
         axis=1,
     )
     # add quantization scheme
@@ -102,6 +105,8 @@ def get_llm_perf_df(machine: str = "hf-dgx-01"):
             "backend.quantization_config.exllama_config.version",
         ]
     ].apply(lambda x: process_quantization_scheme(x), axis=1)
     # add arch
     llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
     # filter columns

 COLUMNS_MAPPING = {
     "Model": "Model 🤗",
+    "experiment_name": "Experiment 🧪",
     # primary measurements
     "forward.latency(s)": "Prefill Latency (s)",
     "decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
     "optimization": "Optimization 🛠️",
     "quantization": "Quantization 🗜️",
     # additional measurements
+    "Arch": "Arch 🏛️",
+    "Size": "Params (B)",
     "Score": "Open LLM Score (%)",
     "generate.latency(s)": "E2E Latency (s)",
     "generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
+    "generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
+    "generate.max_memory_used(MB)": "Used Memory (MB)",
 }
 SORTING_COLUMNS = [
     "Open LLM Score (%)",
         repo_type="dataset",
         token=HF_TOKEN,
     )
     llm_df = pd.read_csv("dataset/open-llm.csv")
     return llm_df
     # add optimization column
     llm_perf_df["optimization"] = llm_perf_df[["backend.to_bettertransformer", "backend.use_flash_attention_2"]].apply(
+        lambda x: (
+            "BetterTransformer"
+            if x["backend.to_bettertransformer"]
+            else ("FlashAttentionV2" if x["backend.use_flash_attention_2"] else "None")
+        ),
         axis=1,
     )
     # add quantization scheme
             "backend.quantization_config.exllama_config.version",
         ]
     ].apply(lambda x: process_quantization_scheme(x), axis=1)
+    # process experiment name
+    llm_perf_df["experiment_name"] = llm_perf_df["experiment_name"].apply(lambda x: x.replace("pytorch+cuda+", ""))
     # add arch
     llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
     # filter columns