Benchmarks / app.py
Julien Simon
Add cost-performance index (CPI)
8383fbb
raw
history blame
7.77 kB
"""
This module provides functionality for displaying and analyzing model benchmark results.
It includes functions for data processing, sorting, and a Gradio interface for user interaction.
"""
import logging
import re
import gradio as gr
import pandas as pd
from results import instance_type_mappings, results
logging.basicConfig(level=logging.DEBUG)
def get_model_names():
"""
Retrieve a sorted list of model names from the results data.
Returns:
list: Sorted list of model names.
"""
return sorted([model["name"] for model in results["models"]])
def get_models_by_architecture(model_name):
"""
Retrieve models with the same architecture as the specified model.
Args:
model_name (str): Name of the model to match architecture.
Returns:
list: List of models with the same architecture.
"""
selected_model = next(
(m for m in results["models"] if m["name"] == model_name), None
)
if not selected_model:
return []
model_type = selected_model.get("modelType", "")
return [m for m in results["models"] if m.get("modelType", "") == model_type]
def custom_sort_key(instance_type):
"""
Generate a custom sorting key for instance types.
Args:
instance_type (str): The instance type to generate a key for.
Returns:
tuple: A tuple used for sorting, containing (family, size_index).
"""
size_order = [
"xlarge",
"2xlarge",
"4xlarge",
"8xlarge",
"12xlarge",
"16xlarge",
"24xlarge",
"48xlarge",
]
match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type)
if match:
family, size = match.groups()
return (
family,
size_order.index(size) if size in size_order else len(size_order),
)
return (instance_type, 0) # Fallback for non-standard instance types
def process_model_data(models):
"""Process model data and return a list of configurations."""
data = []
for model in models:
for config in model.get("configurations", []):
process_configuration(config, data)
return data
def process_configuration(config, data):
"""Process a single configuration and append to data list."""
instance_type = config.get("instanceType", "N/A")
instance_info = instance_type_mappings.get(instance_type, {})
instance_data = {
"cloud": instance_info.get("cloud", "N/A"),
"gpu": instance_info.get("gpu", "N/A"),
"gpu_ram": instance_info.get("gpuRAM", "N/A"),
"instance_type": instance_type,
}
if "configurations" in config:
for nested_config in config["configurations"]:
append_config_data(nested_config, instance_data, data)
else:
append_config_data(config, instance_data, data)
def append_config_data(config, instance_data, data):
"""Append configuration data to the data list."""
data.append(
{
"Cloud": instance_data["cloud"],
"Instance Type": instance_data["instance_type"],
"GPU": instance_data["gpu"],
"GPU RAM": instance_data["gpu_ram"],
"Status": config.get("status", "N/A"),
"Quantization": config.get("quantization", "N/A"),
"Container": config.get("container", config.get("tgi", "N/A")),
"Tokens per Second": config.get("tokensPerSecond", 0),
"Notes": config.get("notes", ""),
}
)
def create_and_process_dataframe(data):
"""Create and process the DataFrame with CPI calculation."""
df = pd.DataFrame(data)
df["CPI"] = df.apply(calculate_cpi, axis=1)
df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce")
df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce")
columns = df.columns.tolist()
tokens_per_second_index = columns.index("Tokens per Second")
columns.remove("CPI")
columns.insert(tokens_per_second_index + 1, "CPI")
df = df[columns]
return df.sort_values("CPI", ascending=False, na_position="last")
def calculate_cpi(row):
"""Calculate CPI for a given row."""
instance_price = instance_type_mappings.get(row["Instance Type"], {}).get(
"price", 0
)
tokens_per_second = row["Tokens per Second"]
try:
tokens_per_second = float(tokens_per_second)
if tokens_per_second > 0 and instance_price > 0:
return tokens_per_second / instance_price
return pd.NA
except (ValueError, TypeError):
return pd.NA
def style_dataframe(df):
"""Apply styling to the DataFrame."""
def color_status(val):
if val == "OK":
return "background-color: green; color: white"
if val == "KO":
return "background-color: red; color: white"
return ""
return df.style.map(color_status, subset=["Status"]).format(
{"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A"
)
def display_results(model_name):
"""
Process and display results for a given model, including CPI calculation.
Args:
model_name (str): Name of the model to display results for.
Returns:
tuple: A tuple containing:
- str: Markdown formatted string with model information.
- pandas.DataFrame: Styled DataFrame with the results, including CPI.
"""
try:
models = get_models_by_architecture(model_name)
if not models:
logging.warning("No models found for %s", model_name)
return (
f"No results found for the selected model: {model_name}",
pd.DataFrame(),
)
model_type = models[0].get("modelType", "N/A")
data = process_model_data(models)
if not data:
logging.warning("No data extracted for %s", model_name)
return f"No data for the selected model: {model_name}", pd.DataFrame()
merged_models = set(model.get("name", "Unknown") for model in models)
merged_models_message = (
f"Note: Results merged from models: {', '.join(merged_models)}"
if len(merged_models) > 1
else None
)
result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
if merged_models_message:
result_text += f"\n\n{merged_models_message}"
df = create_and_process_dataframe(data)
styled_df = style_dataframe(df)
return result_text, styled_df
except (KeyError, ValueError, TypeError) as e:
logging.exception("Error in display_results: %s", e)
return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame()
with gr.Blocks() as demo:
gr.Markdown("# Model Benchmark Results")
gr.Markdown(
"""This table shows the benchmark results for each model. \n\n
Configurations are default unless noted.\n
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html),
[SGLang](https://github.com/sgl-project/sglang),
[Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n
CPI means cost-perfomance index and is calculated as tokens per second / instance price."""
)
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
results_text = gr.Markdown()
results_output = gr.DataFrame(label="Results")
model_dropdown.change(
display_results, inputs=[model_dropdown], outputs=[results_text, results_output]
)
if __name__ == "__main__":
demo.launch()