Benchmarks / app.py
Julien Simon
Update
e009fe7
raw
history blame
7.69 kB
"""
This module provides functionality for displaying and analyzing model benchmark results.
It includes functions for data processing, sorting, and a Gradio interface for user interaction.
"""
import logging
import re
import gradio as gr
import pandas as pd
from results import instance_type_mappings, results
logging.basicConfig(level=logging.DEBUG)
def get_model_names():
"""
Retrieve a sorted list of model names from the results data.
Returns:
list: Sorted list of model names.
"""
return sorted([model["name"] for model in results["models"]])
def get_models_by_architecture(model_name):
"""
Retrieve models with the same architecture as the specified model.
Args:
model_name (str): Name of the model to match architecture.
Returns:
list: List of models with the same architecture.
"""
selected_model = next(
(m for m in results["models"] if m["name"] == model_name), None
)
if not selected_model:
return []
model_type = selected_model.get("modelType", "")
return [m for m in results["models"] if m.get("modelType", "") == model_type]
def custom_sort_key(instance_type):
"""
Generate a custom sorting key for instance types.
Args:
instance_type (str): The instance type to generate a key for.
Returns:
tuple: A tuple used for sorting, containing (family, size_index).
"""
size_order = [
"xlarge",
"2xlarge",
"4xlarge",
"8xlarge",
"12xlarge",
"16xlarge",
"24xlarge",
"48xlarge",
]
match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type)
if match:
family, size = match.groups()
return (
family,
size_order.index(size) if size in size_order else len(size_order),
)
return (instance_type, 0) # Fallback for non-standard instance types
def display_results(model_name):
"""
Process and display results for a given model.
Args:
model_name (str): Name of the model to display results for.
Returns:
tuple: A tuple containing:
- str: Markdown formatted string with model information.
- pandas.DataFrame: Styled DataFrame with the results.
"""
try:
models = get_models_by_architecture(model_name)
if not models:
logging.warning("No models found for %s", model_name)
return (
f"No results found for the selected model: {model_name}",
pd.DataFrame(),
)
model_type = models[0].get("modelType", "N/A")
data = []
merged_models = set()
for model in models:
merged_models.add(model.get("name", "Unknown"))
for config in model.get("configurations", []):
try:
instance_type = config.get("instanceType", "N/A")
# Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
instance_info = instance_type_mappings.get(instance_type, {})
cloud = instance_info.get("cloud", "N/A")
gpu = instance_info.get("gpu", "N/A")
gpu_ram = instance_info.get("gpuRAM", "N/A")
# url = instance_info.get("url", "")
if "configurations" in config:
for nested_config in config["configurations"]:
data.append(
{
"Cloud": cloud,
"Instance Type": instance_type,
"GPU": gpu,
"GPU RAM": gpu_ram,
"Status": nested_config.get("status", "N/A"),
"Quantization": nested_config.get(
"quantization", "N/A"
),
"Container": nested_config.get(
"container",
nested_config.get("tgi", "N/A"),
),
"Tokens per Second": nested_config.get(
"tokensPerSecond", "N/A"
),
"Notes": nested_config.get("notes", ""),
}
)
else:
data.append(
{
"Cloud": cloud,
"Instance Type": instance_type,
"GPU": gpu,
"GPU RAM": gpu_ram,
"Status": config.get("status", "N/A"),
"Quantization": config.get("quantization", "N/A"),
"Container": config.get(
"container", config.get("tgi", "N/A")
),
"Tokens per Second": config.get(
"tokensPerSecond", "N/A"
),
"Notes": config.get("notes", ""),
}
)
except (KeyError, ValueError, TypeError) as e:
logging.error("Error processing configuration: %s", e)
continue
if not data:
logging.warning("No data extracted for %s", model_name)
return (
f"No data for the selected model: {model_name}",
pd.DataFrame(),
)
merged_models_message = (
f"Note: Results merged from models: {', '.join(merged_models)}"
if len(merged_models) > 1
else None
)
sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))
result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
if merged_models_message:
result_text += f"\n\n{merged_models_message}"
df = pd.DataFrame(sorted_data)
def color_status(val):
if val == "OK":
return "background-color: green; color: white"
if val == "KO":
return "background-color: red; color: white"
return ""
styled_df = df.style.applymap(color_status, subset=["Status"])
return result_text, styled_df
except (KeyError, ValueError, TypeError) as e:
logging.exception("Error in display_results: %s", e)
return (
f"An error occurred for {model_name}: {str(e)}",
pd.DataFrame(),
)
with gr.Blocks() as demo:
gr.Markdown("# Model Benchmark Results")
gr.Markdown(
"""This table shows the benchmark results for each model. \n\n
Configurations are default unless noted.
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
)
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
results_text = gr.Markdown()
results_output = gr.DataFrame(label="Results")
model_dropdown.change(
display_results, inputs=[model_dropdown], outputs=[results_text, results_output]
)
if __name__ == "__main__":
demo.launch()