Spaces:

arcee-ai
/

Benchmarks

Running

Benchmarks / app.py

Julien Simon

Add cost-performance index (CPI)

8383fbb 3 months ago

7.77 kB

	"""
	This module provides functionality for displaying and analyzing model benchmark results.
	It includes functions for data processing, sorting, and a Gradio interface for user interaction.
	"""

	import logging
	import re

	import gradio as gr
	import pandas as pd

	from results import instance_type_mappings, results

	logging.basicConfig(level=logging.DEBUG)


	def get_model_names():
	"""
	Retrieve a sorted list of model names from the results data.

	Returns:
	list: Sorted list of model names.
	"""
	return sorted([model["name"] for model in results["models"]])


	def get_models_by_architecture(model_name):
	"""
	Retrieve models with the same architecture as the specified model.

	Args:
	model_name (str): Name of the model to match architecture.

	Returns:
	list: List of models with the same architecture.
	"""
	selected_model = next(
	(m for m in results["models"] if m["name"] == model_name), None
	)
	if not selected_model:
	return []

	model_type = selected_model.get("modelType", "")
	return [m for m in results["models"] if m.get("modelType", "") == model_type]


	def custom_sort_key(instance_type):
	"""
	Generate a custom sorting key for instance types.

	Args:
	instance_type (str): The instance type to generate a key for.

	Returns:
	tuple: A tuple used for sorting, containing (family, size_index).
	"""
	size_order = [
	"xlarge",
	"2xlarge",
	"4xlarge",
	"8xlarge",
	"12xlarge",
	"16xlarge",
	"24xlarge",
	"48xlarge",
	]

	match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type)
	if match:
	family, size = match.groups()
	return (
	family,
	size_order.index(size) if size in size_order else len(size_order),
	)
	return (instance_type, 0) # Fallback for non-standard instance types


	def process_model_data(models):
	"""Process model data and return a list of configurations."""
	data = []
	for model in models:
	for config in model.get("configurations", []):
	process_configuration(config, data)
	return data


	def process_configuration(config, data):
	"""Process a single configuration and append to data list."""
	instance_type = config.get("instanceType", "N/A")
	instance_info = instance_type_mappings.get(instance_type, {})
	instance_data = {
	"cloud": instance_info.get("cloud", "N/A"),
	"gpu": instance_info.get("gpu", "N/A"),
	"gpu_ram": instance_info.get("gpuRAM", "N/A"),
	"instance_type": instance_type,
	}

	if "configurations" in config:
	for nested_config in config["configurations"]:
	append_config_data(nested_config, instance_data, data)
	else:
	append_config_data(config, instance_data, data)


	def append_config_data(config, instance_data, data):
	"""Append configuration data to the data list."""
	data.append(
	{
	"Cloud": instance_data["cloud"],
	"Instance Type": instance_data["instance_type"],
	"GPU": instance_data["gpu"],
	"GPU RAM": instance_data["gpu_ram"],
	"Status": config.get("status", "N/A"),
	"Quantization": config.get("quantization", "N/A"),
	"Container": config.get("container", config.get("tgi", "N/A")),
	"Tokens per Second": config.get("tokensPerSecond", 0),
	"Notes": config.get("notes", ""),
	}
	)


	def create_and_process_dataframe(data):
	"""Create and process the DataFrame with CPI calculation."""
	df = pd.DataFrame(data)
	df["CPI"] = df.apply(calculate_cpi, axis=1)
	df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce")
	df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce")

	columns = df.columns.tolist()
	tokens_per_second_index = columns.index("Tokens per Second")
	columns.remove("CPI")
	columns.insert(tokens_per_second_index + 1, "CPI")
	df = df[columns]

	return df.sort_values("CPI", ascending=False, na_position="last")


	def calculate_cpi(row):
	"""Calculate CPI for a given row."""
	instance_price = instance_type_mappings.get(row["Instance Type"], {}).get(
	"price", 0
	)
	tokens_per_second = row["Tokens per Second"]

	try:
	tokens_per_second = float(tokens_per_second)
	if tokens_per_second > 0 and instance_price > 0:
	return tokens_per_second / instance_price
	return pd.NA
	except (ValueError, TypeError):
	return pd.NA


	def style_dataframe(df):
	"""Apply styling to the DataFrame."""

	def color_status(val):
	if val == "OK":
	return "background-color: green; color: white"
	if val == "KO":
	return "background-color: red; color: white"
	return ""

	return df.style.map(color_status, subset=["Status"]).format(
	{"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A"
	)


	def display_results(model_name):
	"""
	Process and display results for a given model, including CPI calculation.

	Args:
	model_name (str): Name of the model to display results for.

	Returns:
	tuple: A tuple containing:
	- str: Markdown formatted string with model information.
	- pandas.DataFrame: Styled DataFrame with the results, including CPI.
	"""
	try:
	models = get_models_by_architecture(model_name)
	if not models:
	logging.warning("No models found for %s", model_name)
	return (
	f"No results found for the selected model: {model_name}",
	pd.DataFrame(),
	)

	model_type = models[0].get("modelType", "N/A")
	data = process_model_data(models)

	if not data:
	logging.warning("No data extracted for %s", model_name)
	return f"No data for the selected model: {model_name}", pd.DataFrame()

	merged_models = set(model.get("name", "Unknown") for model in models)
	merged_models_message = (
	f"Note: Results merged from models: {', '.join(merged_models)}"
	if len(merged_models) > 1
	else None
	)

	result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
	if merged_models_message:
	result_text += f"\n\n{merged_models_message}"

	df = create_and_process_dataframe(data)
	styled_df = style_dataframe(df)

	return result_text, styled_df

	except (KeyError, ValueError, TypeError) as e:
	logging.exception("Error in display_results: %s", e)
	return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame()


	with gr.Blocks() as demo:
	gr.Markdown("# Model Benchmark Results")
	gr.Markdown(
	"""This table shows the benchmark results for each model. \n\n
	Configurations are default unless noted.\n
	[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
	[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html),
	[SGLang](https://github.com/sgl-project/sglang),
	[Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n
	CPI means cost-perfomance index and is calculated as tokens per second / instance price."""
	)
	model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

	results_text = gr.Markdown()
	results_output = gr.DataFrame(label="Results")

	model_dropdown.change(
	display_results, inputs=[model_dropdown], outputs=[results_text, results_output]
	)

	if __name__ == "__main__":
	demo.launch()