Spaces:

arcee-ai
/

Benchmarks

Running

File size: 7,685 Bytes

"""
This module provides functionality for displaying and analyzing model benchmark results.
It includes functions for data processing, sorting, and a Gradio interface for user interaction.
"""

import logging
import re

import gradio as gr
import pandas as pd

from results import instance_type_mappings, results

logging.basicConfig(level=logging.DEBUG)


def get_model_names():
    """
    Retrieve a sorted list of model names from the results data.

    Returns:
        list: Sorted list of model names.
    """
    return sorted([model["name"] for model in results["models"]])


def get_models_by_architecture(model_name):
    """
    Retrieve models with the same architecture as the specified model.

    Args:
        model_name (str): Name of the model to match architecture.

    Returns:
        list: List of models with the same architecture.
    """
    selected_model = next(
        (m for m in results["models"] if m["name"] == model_name), None
    )
    if not selected_model:
        return []

    model_type = selected_model.get("modelType", "")
    return [m for m in results["models"] if m.get("modelType", "") == model_type]


def custom_sort_key(instance_type):
    """
    Generate a custom sorting key for instance types.

    Args:
        instance_type (str): The instance type to generate a key for.

    Returns:
        tuple: A tuple used for sorting, containing (family, size_index).
    """
    size_order = [
        "xlarge",
        "2xlarge",
        "4xlarge",
        "8xlarge",
        "12xlarge",
        "16xlarge",
        "24xlarge",
        "48xlarge",
    ]

    match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type)
    if match:
        family, size = match.groups()
        return (
            family,
            size_order.index(size) if size in size_order else len(size_order),
        )
    return (instance_type, 0)  # Fallback for non-standard instance types


def display_results(model_name):
    """
    Process and display results for a given model.

    Args:
        model_name (str): Name of the model to display results for.

    Returns:
        tuple: A tuple containing:
            - str: Markdown formatted string with model information.
            - pandas.DataFrame: Styled DataFrame with the results.
    """
    try:
        models = get_models_by_architecture(model_name)
        if not models:
            logging.warning("No models found for %s", model_name)
            return (
                f"No results found for the selected model: {model_name}",
                pd.DataFrame(),
            )

        model_type = models[0].get("modelType", "N/A")
        data = []
        merged_models = set()

        for model in models:
            merged_models.add(model.get("name", "Unknown"))
            for config in model.get("configurations", []):
                try:
                    instance_type = config.get("instanceType", "N/A")

                    # Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
                    instance_info = instance_type_mappings.get(instance_type, {})
                    cloud = instance_info.get("cloud", "N/A")
                    gpu = instance_info.get("gpu", "N/A")
                    gpu_ram = instance_info.get("gpuRAM", "N/A")
                    # url = instance_info.get("url", "")

                    if "configurations" in config:
                        for nested_config in config["configurations"]:
                            data.append(
                                {
                                    "Cloud": cloud,
                                    "Instance Type": instance_type,
                                    "GPU": gpu,
                                    "GPU RAM": gpu_ram,
                                    "Status": nested_config.get("status", "N/A"),
                                    "Quantization": nested_config.get(
                                        "quantization", "N/A"
                                    ),
                                    "Container": nested_config.get(
                                        "container",
                                        nested_config.get("tgi", "N/A"),
                                    ),
                                    "Tokens per Second": nested_config.get(
                                        "tokensPerSecond", "N/A"
                                    ),
                                    "Notes": nested_config.get("notes", ""),
                                }
                            )
                    else:
                        data.append(
                            {
                                "Cloud": cloud,
                                "Instance Type": instance_type,
                                "GPU": gpu,
                                "GPU RAM": gpu_ram,
                                "Status": config.get("status", "N/A"),
                                "Quantization": config.get("quantization", "N/A"),
                                "Container": config.get(
                                    "container", config.get("tgi", "N/A")
                                ),
                                "Tokens per Second": config.get(
                                    "tokensPerSecond", "N/A"
                                ),
                                "Notes": config.get("notes", ""),
                            }
                        )
                except (KeyError, ValueError, TypeError) as e:
                    logging.error("Error processing configuration: %s", e)
                    continue

        if not data:
            logging.warning("No data extracted for %s", model_name)
            return (
                f"No data for the selected model: {model_name}",
                pd.DataFrame(),
            )

        merged_models_message = (
            f"Note: Results merged from models: {', '.join(merged_models)}"
            if len(merged_models) > 1
            else None
        )

        sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))

        result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
        if merged_models_message:
            result_text += f"\n\n{merged_models_message}"

        df = pd.DataFrame(sorted_data)

        def color_status(val):
            if val == "OK":
                return "background-color: green; color: white"
            if val == "KO":
                return "background-color: red; color: white"
            return ""

        styled_df = df.style.applymap(color_status, subset=["Status"])

        return result_text, styled_df

    except (KeyError, ValueError, TypeError) as e:
        logging.exception("Error in display_results: %s", e)
        return (
            f"An error occurred for {model_name}: {str(e)}",
            pd.DataFrame(),
        )


with gr.Blocks() as demo:
    gr.Markdown("# Model Benchmark Results")
    gr.Markdown(
        """This table shows the benchmark results for each model. \n\n
        Configurations are default unless noted.
        [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
        [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
    )
    model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

    results_text = gr.Markdown()
    results_output = gr.DataFrame(label="Results")

    model_dropdown.change(
        display_results, inputs=[model_dropdown], outputs=[results_text, results_output]
    )

    if __name__ == "__main__":
        demo.launch()