Spaces:
Running
Running
Julien Simon
commited on
Commit
•
8383fbb
1
Parent(s):
7dff48b
Add cost-performance index (CPI)
Browse files- app.py +104 -83
- results.py +1 -0
app.py
CHANGED
@@ -75,9 +75,100 @@ def custom_sort_key(instance_type):
|
|
75 |
return (instance_type, 0) # Fallback for non-standard instance types
|
76 |
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def display_results(model_name):
|
79 |
"""
|
80 |
-
Process and display results for a given model.
|
81 |
|
82 |
Args:
|
83 |
model_name (str): Name of the model to display results for.
|
@@ -85,7 +176,7 @@ def display_results(model_name):
|
|
85 |
Returns:
|
86 |
tuple: A tuple containing:
|
87 |
- str: Markdown formatted string with model information.
|
88 |
-
- pandas.DataFrame: Styled DataFrame with the results.
|
89 |
"""
|
90 |
try:
|
91 |
models = get_models_by_architecture(model_name)
|
@@ -97,113 +188,43 @@ def display_results(model_name):
|
|
97 |
)
|
98 |
|
99 |
model_type = models[0].get("modelType", "N/A")
|
100 |
-
data =
|
101 |
-
merged_models = set()
|
102 |
-
|
103 |
-
for model in models:
|
104 |
-
merged_models.add(model.get("name", "Unknown"))
|
105 |
-
for config in model.get("configurations", []):
|
106 |
-
try:
|
107 |
-
instance_type = config.get("instanceType", "N/A")
|
108 |
-
|
109 |
-
# Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
|
110 |
-
instance_info = instance_type_mappings.get(instance_type, {})
|
111 |
-
cloud = instance_info.get("cloud", "N/A")
|
112 |
-
gpu = instance_info.get("gpu", "N/A")
|
113 |
-
gpu_ram = instance_info.get("gpuRAM", "N/A")
|
114 |
-
# url = instance_info.get("url", "")
|
115 |
-
|
116 |
-
if "configurations" in config:
|
117 |
-
for nested_config in config["configurations"]:
|
118 |
-
data.append(
|
119 |
-
{
|
120 |
-
"Cloud": cloud,
|
121 |
-
"Instance Type": instance_type,
|
122 |
-
"GPU": gpu,
|
123 |
-
"GPU RAM": gpu_ram,
|
124 |
-
"Status": nested_config.get("status", "N/A"),
|
125 |
-
"Quantization": nested_config.get(
|
126 |
-
"quantization", "N/A"
|
127 |
-
),
|
128 |
-
"Container": nested_config.get(
|
129 |
-
"container",
|
130 |
-
nested_config.get("tgi", "N/A"),
|
131 |
-
),
|
132 |
-
"Tokens per Second": nested_config.get(
|
133 |
-
"tokensPerSecond", "N/A"
|
134 |
-
),
|
135 |
-
"Notes": nested_config.get("notes", ""),
|
136 |
-
}
|
137 |
-
)
|
138 |
-
else:
|
139 |
-
data.append(
|
140 |
-
{
|
141 |
-
"Cloud": cloud,
|
142 |
-
"Instance Type": instance_type,
|
143 |
-
"GPU": gpu,
|
144 |
-
"GPU RAM": gpu_ram,
|
145 |
-
"Status": config.get("status", "N/A"),
|
146 |
-
"Quantization": config.get("quantization", "N/A"),
|
147 |
-
"Container": config.get(
|
148 |
-
"container", config.get("tgi", "N/A")
|
149 |
-
),
|
150 |
-
"Tokens per Second": config.get(
|
151 |
-
"tokensPerSecond", "N/A"
|
152 |
-
),
|
153 |
-
"Notes": config.get("notes", ""),
|
154 |
-
}
|
155 |
-
)
|
156 |
-
except (KeyError, ValueError, TypeError) as e:
|
157 |
-
logging.error("Error processing configuration: %s", e)
|
158 |
-
continue
|
159 |
|
160 |
if not data:
|
161 |
logging.warning("No data extracted for %s", model_name)
|
162 |
-
return (
|
163 |
-
f"No data for the selected model: {model_name}",
|
164 |
-
pd.DataFrame(),
|
165 |
-
)
|
166 |
|
|
|
167 |
merged_models_message = (
|
168 |
f"Note: Results merged from models: {', '.join(merged_models)}"
|
169 |
if len(merged_models) > 1
|
170 |
else None
|
171 |
)
|
172 |
|
173 |
-
sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))
|
174 |
-
|
175 |
result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
|
176 |
if merged_models_message:
|
177 |
result_text += f"\n\n{merged_models_message}"
|
178 |
|
179 |
-
df =
|
180 |
-
|
181 |
-
def color_status(val):
|
182 |
-
if val == "OK":
|
183 |
-
return "background-color: green; color: white"
|
184 |
-
if val == "KO":
|
185 |
-
return "background-color: red; color: white"
|
186 |
-
return ""
|
187 |
-
|
188 |
-
styled_df = df.style.applymap(color_status, subset=["Status"])
|
189 |
|
190 |
return result_text, styled_df
|
191 |
|
192 |
except (KeyError, ValueError, TypeError) as e:
|
193 |
logging.exception("Error in display_results: %s", e)
|
194 |
-
return (
|
195 |
-
f"An error occurred for {model_name}: {str(e)}",
|
196 |
-
pd.DataFrame(),
|
197 |
-
)
|
198 |
|
199 |
|
200 |
with gr.Blocks() as demo:
|
201 |
gr.Markdown("# Model Benchmark Results")
|
202 |
gr.Markdown(
|
203 |
"""This table shows the benchmark results for each model. \n\n
|
204 |
-
Configurations are default unless noted
|
205 |
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
|
206 |
-
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)
|
|
|
|
|
|
|
207 |
)
|
208 |
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
|
209 |
|
|
|
75 |
return (instance_type, 0) # Fallback for non-standard instance types
|
76 |
|
77 |
|
78 |
+
def process_model_data(models):
|
79 |
+
"""Process model data and return a list of configurations."""
|
80 |
+
data = []
|
81 |
+
for model in models:
|
82 |
+
for config in model.get("configurations", []):
|
83 |
+
process_configuration(config, data)
|
84 |
+
return data
|
85 |
+
|
86 |
+
|
87 |
+
def process_configuration(config, data):
|
88 |
+
"""Process a single configuration and append to data list."""
|
89 |
+
instance_type = config.get("instanceType", "N/A")
|
90 |
+
instance_info = instance_type_mappings.get(instance_type, {})
|
91 |
+
instance_data = {
|
92 |
+
"cloud": instance_info.get("cloud", "N/A"),
|
93 |
+
"gpu": instance_info.get("gpu", "N/A"),
|
94 |
+
"gpu_ram": instance_info.get("gpuRAM", "N/A"),
|
95 |
+
"instance_type": instance_type,
|
96 |
+
}
|
97 |
+
|
98 |
+
if "configurations" in config:
|
99 |
+
for nested_config in config["configurations"]:
|
100 |
+
append_config_data(nested_config, instance_data, data)
|
101 |
+
else:
|
102 |
+
append_config_data(config, instance_data, data)
|
103 |
+
|
104 |
+
|
105 |
+
def append_config_data(config, instance_data, data):
|
106 |
+
"""Append configuration data to the data list."""
|
107 |
+
data.append(
|
108 |
+
{
|
109 |
+
"Cloud": instance_data["cloud"],
|
110 |
+
"Instance Type": instance_data["instance_type"],
|
111 |
+
"GPU": instance_data["gpu"],
|
112 |
+
"GPU RAM": instance_data["gpu_ram"],
|
113 |
+
"Status": config.get("status", "N/A"),
|
114 |
+
"Quantization": config.get("quantization", "N/A"),
|
115 |
+
"Container": config.get("container", config.get("tgi", "N/A")),
|
116 |
+
"Tokens per Second": config.get("tokensPerSecond", 0),
|
117 |
+
"Notes": config.get("notes", ""),
|
118 |
+
}
|
119 |
+
)
|
120 |
+
|
121 |
+
|
122 |
+
def create_and_process_dataframe(data):
|
123 |
+
"""Create and process the DataFrame with CPI calculation."""
|
124 |
+
df = pd.DataFrame(data)
|
125 |
+
df["CPI"] = df.apply(calculate_cpi, axis=1)
|
126 |
+
df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce")
|
127 |
+
df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce")
|
128 |
+
|
129 |
+
columns = df.columns.tolist()
|
130 |
+
tokens_per_second_index = columns.index("Tokens per Second")
|
131 |
+
columns.remove("CPI")
|
132 |
+
columns.insert(tokens_per_second_index + 1, "CPI")
|
133 |
+
df = df[columns]
|
134 |
+
|
135 |
+
return df.sort_values("CPI", ascending=False, na_position="last")
|
136 |
+
|
137 |
+
|
138 |
+
def calculate_cpi(row):
|
139 |
+
"""Calculate CPI for a given row."""
|
140 |
+
instance_price = instance_type_mappings.get(row["Instance Type"], {}).get(
|
141 |
+
"price", 0
|
142 |
+
)
|
143 |
+
tokens_per_second = row["Tokens per Second"]
|
144 |
+
|
145 |
+
try:
|
146 |
+
tokens_per_second = float(tokens_per_second)
|
147 |
+
if tokens_per_second > 0 and instance_price > 0:
|
148 |
+
return tokens_per_second / instance_price
|
149 |
+
return pd.NA
|
150 |
+
except (ValueError, TypeError):
|
151 |
+
return pd.NA
|
152 |
+
|
153 |
+
|
154 |
+
def style_dataframe(df):
|
155 |
+
"""Apply styling to the DataFrame."""
|
156 |
+
|
157 |
+
def color_status(val):
|
158 |
+
if val == "OK":
|
159 |
+
return "background-color: green; color: white"
|
160 |
+
if val == "KO":
|
161 |
+
return "background-color: red; color: white"
|
162 |
+
return ""
|
163 |
+
|
164 |
+
return df.style.map(color_status, subset=["Status"]).format(
|
165 |
+
{"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A"
|
166 |
+
)
|
167 |
+
|
168 |
+
|
169 |
def display_results(model_name):
|
170 |
"""
|
171 |
+
Process and display results for a given model, including CPI calculation.
|
172 |
|
173 |
Args:
|
174 |
model_name (str): Name of the model to display results for.
|
|
|
176 |
Returns:
|
177 |
tuple: A tuple containing:
|
178 |
- str: Markdown formatted string with model information.
|
179 |
+
- pandas.DataFrame: Styled DataFrame with the results, including CPI.
|
180 |
"""
|
181 |
try:
|
182 |
models = get_models_by_architecture(model_name)
|
|
|
188 |
)
|
189 |
|
190 |
model_type = models[0].get("modelType", "N/A")
|
191 |
+
data = process_model_data(models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
if not data:
|
194 |
logging.warning("No data extracted for %s", model_name)
|
195 |
+
return f"No data for the selected model: {model_name}", pd.DataFrame()
|
|
|
|
|
|
|
196 |
|
197 |
+
merged_models = set(model.get("name", "Unknown") for model in models)
|
198 |
merged_models_message = (
|
199 |
f"Note: Results merged from models: {', '.join(merged_models)}"
|
200 |
if len(merged_models) > 1
|
201 |
else None
|
202 |
)
|
203 |
|
|
|
|
|
204 |
result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
|
205 |
if merged_models_message:
|
206 |
result_text += f"\n\n{merged_models_message}"
|
207 |
|
208 |
+
df = create_and_process_dataframe(data)
|
209 |
+
styled_df = style_dataframe(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
return result_text, styled_df
|
212 |
|
213 |
except (KeyError, ValueError, TypeError) as e:
|
214 |
logging.exception("Error in display_results: %s", e)
|
215 |
+
return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame()
|
|
|
|
|
|
|
216 |
|
217 |
|
218 |
with gr.Blocks() as demo:
|
219 |
gr.Markdown("# Model Benchmark Results")
|
220 |
gr.Markdown(
|
221 |
"""This table shows the benchmark results for each model. \n\n
|
222 |
+
Configurations are default unless noted.\n
|
223 |
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
|
224 |
+
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html),
|
225 |
+
[SGLang](https://github.com/sgl-project/sglang),
|
226 |
+
[Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n
|
227 |
+
CPI means cost-perfomance index and is calculated as tokens per second / instance price."""
|
228 |
)
|
229 |
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
|
230 |
|
results.py
CHANGED
@@ -155,6 +155,7 @@ instance_type_mappings = {
|
|
155 |
"gpu": "24xNeuronCore v2",
|
156 |
"gpuRAM": "384 GB",
|
157 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
|
|
|
158 |
},
|
159 |
}
|
160 |
|
|
|
155 |
"gpu": "24xNeuronCore v2",
|
156 |
"gpuRAM": "384 GB",
|
157 |
"url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
|
158 |
+
"price": 12.981,
|
159 |
},
|
160 |
}
|
161 |
|