Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Sep 13

Commit

8383fbb

•

1 Parent(s): 7dff48b

Add cost-performance index (CPI)

Browse files

Files changed (2) hide show

app.py +104 -83
results.py +1 -0

app.py CHANGED Viewed

@@ -75,9 +75,100 @@ def custom_sort_key(instance_type):
     return (instance_type, 0)  # Fallback for non-standard instance types
 def display_results(model_name):
     """
-    Process and display results for a given model.
     Args:
         model_name (str): Name of the model to display results for.
@@ -85,7 +176,7 @@ def display_results(model_name):
     Returns:
         tuple: A tuple containing:
             - str: Markdown formatted string with model information.
-            - pandas.DataFrame: Styled DataFrame with the results.
     """
     try:
         models = get_models_by_architecture(model_name)
@@ -97,113 +188,43 @@ def display_results(model_name):
             )
         model_type = models[0].get("modelType", "N/A")
-        data = []
-        merged_models = set()
-        for model in models:
-            merged_models.add(model.get("name", "Unknown"))
-            for config in model.get("configurations", []):
-                try:
-                    instance_type = config.get("instanceType", "N/A")
-                    # Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
-                    instance_info = instance_type_mappings.get(instance_type, {})
-                    cloud = instance_info.get("cloud", "N/A")
-                    gpu = instance_info.get("gpu", "N/A")
-                    gpu_ram = instance_info.get("gpuRAM", "N/A")
-                    # url = instance_info.get("url", "")
-                    if "configurations" in config:
-                        for nested_config in config["configurations"]:
-                            data.append(
-                                {
-                                    "Cloud": cloud,
-                                    "Instance Type": instance_type,
-                                    "GPU": gpu,
-                                    "GPU RAM": gpu_ram,
-                                    "Status": nested_config.get("status", "N/A"),
-                                    "Quantization": nested_config.get(
-                                        "quantization", "N/A"
-                                    ),
-                                    "Container": nested_config.get(
-                                        "container",
-                                        nested_config.get("tgi", "N/A"),
-                                    ),
-                                    "Tokens per Second": nested_config.get(
-                                        "tokensPerSecond", "N/A"
-                                    ),
-                                    "Notes": nested_config.get("notes", ""),
-                                }
-                            )
-                    else:
-                        data.append(
-                            {
-                                "Cloud": cloud,
-                                "Instance Type": instance_type,
-                                "GPU": gpu,
-                                "GPU RAM": gpu_ram,
-                                "Status": config.get("status", "N/A"),
-                                "Quantization": config.get("quantization", "N/A"),
-                                "Container": config.get(
-                                    "container", config.get("tgi", "N/A")
-                                ),
-                                "Tokens per Second": config.get(
-                                    "tokensPerSecond", "N/A"
-                                ),
-                                "Notes": config.get("notes", ""),
-                            }
-                        )
-                except (KeyError, ValueError, TypeError) as e:
-                    logging.error("Error processing configuration: %s", e)
-                    continue
         if not data:
             logging.warning("No data extracted for %s", model_name)
-            return (
-                f"No data for the selected model: {model_name}",
-                pd.DataFrame(),
-            )
         merged_models_message = (
             f"Note: Results merged from models: {', '.join(merged_models)}"
             if len(merged_models) > 1
             else None
         )
-        sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))
         result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
         if merged_models_message:
             result_text += f"\n\n{merged_models_message}"
-        df = pd.DataFrame(sorted_data)
-        def color_status(val):
-            if val == "OK":
-                return "background-color: green; color: white"
-            if val == "KO":
-                return "background-color: red; color: white"
-            return ""
-        styled_df = df.style.applymap(color_status, subset=["Status"])
         return result_text, styled_df
     except (KeyError, ValueError, TypeError) as e:
         logging.exception("Error in display_results: %s", e)
-        return (
-            f"An error occurred for {model_name}: {str(e)}",
-            pd.DataFrame(),
-        )
 with gr.Blocks() as demo:
     gr.Markdown("# Model Benchmark Results")
     gr.Markdown(
         """This table shows the benchmark results for each model. \n\n
-        Configurations are default unless noted.
         [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
-        [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
     )
     model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

     return (instance_type, 0)  # Fallback for non-standard instance types
+def process_model_data(models):
+    """Process model data and return a list of configurations."""
+    data = []
+    for model in models:
+        for config in model.get("configurations", []):
+            process_configuration(config, data)
+    return data
+def process_configuration(config, data):
+    """Process a single configuration and append to data list."""
+    instance_type = config.get("instanceType", "N/A")
+    instance_info = instance_type_mappings.get(instance_type, {})
+    instance_data = {
+        "cloud": instance_info.get("cloud", "N/A"),
+        "gpu": instance_info.get("gpu", "N/A"),
+        "gpu_ram": instance_info.get("gpuRAM", "N/A"),
+        "instance_type": instance_type,
+    }
+    if "configurations" in config:
+        for nested_config in config["configurations"]:
+            append_config_data(nested_config, instance_data, data)
+    else:
+        append_config_data(config, instance_data, data)
+def append_config_data(config, instance_data, data):
+    """Append configuration data to the data list."""
+    data.append(
+        {
+            "Cloud": instance_data["cloud"],
+            "Instance Type": instance_data["instance_type"],
+            "GPU": instance_data["gpu"],
+            "GPU RAM": instance_data["gpu_ram"],
+            "Status": config.get("status", "N/A"),
+            "Quantization": config.get("quantization", "N/A"),
+            "Container": config.get("container", config.get("tgi", "N/A")),
+            "Tokens per Second": config.get("tokensPerSecond", 0),
+            "Notes": config.get("notes", ""),
+        }
+    )
+def create_and_process_dataframe(data):
+    """Create and process the DataFrame with CPI calculation."""
+    df = pd.DataFrame(data)
+    df["CPI"] = df.apply(calculate_cpi, axis=1)
+    df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce")
+    df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce")
+    columns = df.columns.tolist()
+    tokens_per_second_index = columns.index("Tokens per Second")
+    columns.remove("CPI")
+    columns.insert(tokens_per_second_index + 1, "CPI")
+    df = df[columns]
+    return df.sort_values("CPI", ascending=False, na_position="last")
+def calculate_cpi(row):
+    """Calculate CPI for a given row."""
+    instance_price = instance_type_mappings.get(row["Instance Type"], {}).get(
+        "price", 0
+    )
+    tokens_per_second = row["Tokens per Second"]
+    try:
+        tokens_per_second = float(tokens_per_second)
+        if tokens_per_second > 0 and instance_price > 0:
+            return tokens_per_second / instance_price
+        return pd.NA
+    except (ValueError, TypeError):
+        return pd.NA
+def style_dataframe(df):
+    """Apply styling to the DataFrame."""
+    def color_status(val):
+        if val == "OK":
+            return "background-color: green; color: white"
+        if val == "KO":
+            return "background-color: red; color: white"
+        return ""
+    return df.style.map(color_status, subset=["Status"]).format(
+        {"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A"
+    )
 def display_results(model_name):
     """
+    Process and display results for a given model, including CPI calculation.
     Args:
         model_name (str): Name of the model to display results for.
     Returns:
         tuple: A tuple containing:
             - str: Markdown formatted string with model information.
+            - pandas.DataFrame: Styled DataFrame with the results, including CPI.
     """
     try:
         models = get_models_by_architecture(model_name)
             )
         model_type = models[0].get("modelType", "N/A")
+        data = process_model_data(models)
         if not data:
             logging.warning("No data extracted for %s", model_name)
+            return f"No data for the selected model: {model_name}", pd.DataFrame()
+        merged_models = set(model.get("name", "Unknown") for model in models)
         merged_models_message = (
             f"Note: Results merged from models: {', '.join(merged_models)}"
             if len(merged_models) > 1
             else None
         )
         result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
         if merged_models_message:
             result_text += f"\n\n{merged_models_message}"
+        df = create_and_process_dataframe(data)
+        styled_df = style_dataframe(df)
         return result_text, styled_df
     except (KeyError, ValueError, TypeError) as e:
         logging.exception("Error in display_results: %s", e)
+        return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame()
 with gr.Blocks() as demo:
     gr.Markdown("# Model Benchmark Results")
     gr.Markdown(
         """This table shows the benchmark results for each model. \n\n
+        Configurations are default unless noted.\n
         [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
+        [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html),
+        [SGLang](https://github.com/sgl-project/sglang),
+        [Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n
+        CPI means cost-perfomance index and is calculated as tokens per second / instance price."""
     )
     model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

results.py CHANGED Viewed

@@ -155,6 +155,7 @@ instance_type_mappings = {
         "gpu": "24xNeuronCore v2",
         "gpuRAM": "384 GB",
         "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
     },
 }

         "gpu": "24xNeuronCore v2",
         "gpuRAM": "384 GB",
         "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
+        "price": 12.981,
     },
 }