""" This module provides functionality for displaying and analyzing model benchmark results. It includes functions for data processing, sorting, and a Gradio interface for user interaction. """ import logging import re import gradio as gr import pandas as pd from results import instance_type_mappings, results logging.basicConfig(level=logging.DEBUG) def get_model_names(): """ Retrieve a sorted list of model names from the results data. Returns: list: Sorted list of model names. """ return sorted([model["name"] for model in results["models"]]) def get_models_by_architecture(model_name): """ Retrieve models with the same architecture as the specified model. Args: model_name (str): Name of the model to match architecture. Returns: list: List of models with the same architecture. """ selected_model = next( (m for m in results["models"] if m["name"] == model_name), None ) if not selected_model: return [] model_type = selected_model.get("modelType", "") return [m for m in results["models"] if m.get("modelType", "") == model_type] def custom_sort_key(instance_type): """ Generate a custom sorting key for instance types. Args: instance_type (str): The instance type to generate a key for. Returns: tuple: A tuple used for sorting, containing (family, size_index). """ size_order = [ "xlarge", "2xlarge", "4xlarge", "8xlarge", "12xlarge", "16xlarge", "24xlarge", "48xlarge", ] match = re.match(r"([a-z]+\d+)\.(\w+)", instance_type) if match: family, size = match.groups() return ( family, size_order.index(size) if size in size_order else len(size_order), ) return (instance_type, 0) # Fallback for non-standard instance types def process_model_data(models): """Process model data and return a list of configurations.""" data = [] for model in models: for config in model.get("configurations", []): process_configuration(config, data) return data def process_configuration(config, data): """Process a single configuration and append to data list.""" instance_type = config.get("instanceType", "N/A") instance_info = instance_type_mappings.get(instance_type, {}) instance_data = { "cloud": instance_info.get("cloud", "N/A"), "gpu": instance_info.get("gpu", "N/A"), "gpu_ram": instance_info.get("gpuRAM", "N/A"), "instance_type": instance_type, } if "configurations" in config: for nested_config in config["configurations"]: append_config_data(nested_config, instance_data, data) else: append_config_data(config, instance_data, data) def append_config_data(config, instance_data, data): """Append configuration data to the data list.""" data.append( { "Cloud": instance_data["cloud"], "Instance Type": instance_data["instance_type"], "GPU": instance_data["gpu"], "GPU RAM": instance_data["gpu_ram"], "Status": config.get("status", "N/A"), "Quantization": config.get("quantization", "N/A"), "Container": config.get("container", config.get("tgi", "N/A")), "Tokens per Second": config.get("tokensPerSecond", 0), "Notes": config.get("notes", ""), } ) def create_and_process_dataframe(data): """Create and process the DataFrame with CPI calculation.""" df = pd.DataFrame(data) df["CPI"] = df.apply(calculate_cpi, axis=1) df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce") df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce") columns = df.columns.tolist() tokens_per_second_index = columns.index("Tokens per Second") columns.remove("CPI") columns.insert(tokens_per_second_index + 1, "CPI") df = df[columns] return df.sort_values("CPI", ascending=False, na_position="last") def calculate_cpi(row): """Calculate CPI for a given row.""" instance_price = instance_type_mappings.get(row["Instance Type"], {}).get( "price", 0 ) tokens_per_second = row["Tokens per Second"] try: tokens_per_second = float(tokens_per_second) if tokens_per_second > 0 and instance_price > 0: return tokens_per_second / instance_price return pd.NA except (ValueError, TypeError): return pd.NA def style_dataframe(df): """Apply styling to the DataFrame.""" def color_status(val): if val == "OK": return "background-color: green; color: white" if val == "KO": return "background-color: red; color: white" return "" return df.style.map(color_status, subset=["Status"]).format( {"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A" ) def display_results(model_name): """ Process and display results for a given model, including CPI calculation. Args: model_name (str): Name of the model to display results for. Returns: tuple: A tuple containing: - str: Markdown formatted string with model information. - pandas.DataFrame: Styled DataFrame with the results, including CPI. """ try: models = get_models_by_architecture(model_name) if not models: logging.warning("No models found for %s", model_name) return ( f"No results found for the selected model: {model_name}", pd.DataFrame(), ) model_type = models[0].get("modelType", "N/A") data = process_model_data(models) if not data: logging.warning("No data extracted for %s", model_name) return f"No data for the selected model: {model_name}", pd.DataFrame() merged_models = set(model.get("name", "Unknown") for model in models) merged_models_message = ( f"Note: Results merged from models: {', '.join(merged_models)}" if len(merged_models) > 1 else None ) result_text = f"## Results for {model_name}\n\nModel Type: {model_type}" if merged_models_message: result_text += f"\n\n{merged_models_message}" df = create_and_process_dataframe(data) styled_df = style_dataframe(df) return result_text, styled_df except (KeyError, ValueError, TypeError) as e: logging.exception("Error in display_results: %s", e) return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame() with gr.Blocks() as demo: gr.Markdown("# Model Benchmark Results") gr.Markdown( """This table shows the benchmark results for each model. \n\n Configurations are default unless noted.\n [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher), [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html), [SGLang](https://github.com/sgl-project/sglang), [Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n CPI means cost-perfomance index and is calculated as tokens per second / instance price.""" ) model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model") results_text = gr.Markdown() results_output = gr.DataFrame(label="Results") model_dropdown.change( display_results, inputs=[model_dropdown], outputs=[results_text, results_output] ) if __name__ == "__main__": demo.launch()