Spaces:

arcee-ai
/

Benchmarks

Running

Benchmarks / app.py

Julien Simon

Update

e009fe7 3 months ago

7.69 kB

	"""
	This module provides functionality for displaying and analyzing model benchmark results.
	It includes functions for data processing, sorting, and a Gradio interface for user interaction.
	"""

	import logging
	import re

	import gradio as gr
	import pandas as pd

	from results import instance_type_mappings, results

	logging.basicConfig(level=logging.DEBUG)


	def get_model_names():
	"""
	Retrieve a sorted list of model names from the results data.

	Returns:
	list: Sorted list of model names.
	"""
	return sorted([model["name"] for model in results["models"]])


	def get_models_by_architecture(model_name):
	"""
	Retrieve models with the same architecture as the specified model.

	Args:
	model_name (str): Name of the model to match architecture.

	Returns:
	list: List of models with the same architecture.
	"""
	selected_model = next(
	(m for m in results["models"] if m["name"] == model_name), None
	)
	if not selected_model:
	return []

	model_type = selected_model.get("modelType", "")
	return [m for m in results["models"] if m.get("modelType", "") == model_type]


	def custom_sort_key(instance_type):
	"""
	Generate a custom sorting key for instance types.

	Args:
	instance_type (str): The instance type to generate a key for.

	Returns:
	tuple: A tuple used for sorting, containing (family, size_index).
	"""
	size_order = [
	"xlarge",
	"2xlarge",
	"4xlarge",
	"8xlarge",
	"12xlarge",
	"16xlarge",
	"24xlarge",
	"48xlarge",
	]

	match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type)
	if match:
	family, size = match.groups()
	return (
	family,
	size_order.index(size) if size in size_order else len(size_order),
	)
	return (instance_type, 0) # Fallback for non-standard instance types


	def display_results(model_name):
	"""
	Process and display results for a given model.

	Args:
	model_name (str): Name of the model to display results for.

	Returns:
	tuple: A tuple containing:
	- str: Markdown formatted string with model information.
	- pandas.DataFrame: Styled DataFrame with the results.
	"""
	try:
	models = get_models_by_architecture(model_name)
	if not models:
	logging.warning("No models found for %s", model_name)
	return (
	f"No results found for the selected model: {model_name}",
	pd.DataFrame(),
	)

	model_type = models[0].get("modelType", "N/A")
	data = []
	merged_models = set()

	for model in models:
	merged_models.add(model.get("name", "Unknown"))
	for config in model.get("configurations", []):
	try:
	instance_type = config.get("instanceType", "N/A")

	# Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
	instance_info = instance_type_mappings.get(instance_type, {})
	cloud = instance_info.get("cloud", "N/A")
	gpu = instance_info.get("gpu", "N/A")
	gpu_ram = instance_info.get("gpuRAM", "N/A")
	# url = instance_info.get("url", "")

	if "configurations" in config:
	for nested_config in config["configurations"]:
	data.append(
	{
	"Cloud": cloud,
	"Instance Type": instance_type,
	"GPU": gpu,
	"GPU RAM": gpu_ram,
	"Status": nested_config.get("status", "N/A"),
	"Quantization": nested_config.get(
	"quantization", "N/A"
	),
	"Container": nested_config.get(
	"container",
	nested_config.get("tgi", "N/A"),
	),
	"Tokens per Second": nested_config.get(
	"tokensPerSecond", "N/A"
	),
	"Notes": nested_config.get("notes", ""),
	}
	)
	else:
	data.append(
	{
	"Cloud": cloud,
	"Instance Type": instance_type,
	"GPU": gpu,
	"GPU RAM": gpu_ram,
	"Status": config.get("status", "N/A"),
	"Quantization": config.get("quantization", "N/A"),
	"Container": config.get(
	"container", config.get("tgi", "N/A")
	),
	"Tokens per Second": config.get(
	"tokensPerSecond", "N/A"
	),
	"Notes": config.get("notes", ""),
	}
	)
	except (KeyError, ValueError, TypeError) as e:
	logging.error("Error processing configuration: %s", e)
	continue

	if not data:
	logging.warning("No data extracted for %s", model_name)
	return (
	f"No data for the selected model: {model_name}",
	pd.DataFrame(),
	)

	merged_models_message = (
	f"Note: Results merged from models: {', '.join(merged_models)}"
	if len(merged_models) > 1
	else None
	)

	sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))

	result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
	if merged_models_message:
	result_text += f"\n\n{merged_models_message}"

	df = pd.DataFrame(sorted_data)

	def color_status(val):
	if val == "OK":
	return "background-color: green; color: white"
	if val == "KO":
	return "background-color: red; color: white"
	return ""

	styled_df = df.style.applymap(color_status, subset=["Status"])

	return result_text, styled_df

	except (KeyError, ValueError, TypeError) as e:
	logging.exception("Error in display_results: %s", e)
	return (
	f"An error occurred for {model_name}: {str(e)}",
	pd.DataFrame(),
	)


	with gr.Blocks() as demo:
	gr.Markdown("# Model Benchmark Results")
	gr.Markdown(
	"""This table shows the benchmark results for each model. \n\n
	Configurations are default unless noted.
	[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
	[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
	)
	model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

	results_text = gr.Markdown()
	results_output = gr.DataFrame(label="Results")

	model_dropdown.change(
	display_results, inputs=[model_dropdown], outputs=[results_text, results_output]
	)

	if __name__ == "__main__":
	demo.launch()