Julien Simon commited on
Commit
8383fbb
1 Parent(s): 7dff48b

Add cost-performance index (CPI)

Browse files
Files changed (2) hide show
  1. app.py +104 -83
  2. results.py +1 -0
app.py CHANGED
@@ -75,9 +75,100 @@ def custom_sort_key(instance_type):
75
  return (instance_type, 0) # Fallback for non-standard instance types
76
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def display_results(model_name):
79
  """
80
- Process and display results for a given model.
81
 
82
  Args:
83
  model_name (str): Name of the model to display results for.
@@ -85,7 +176,7 @@ def display_results(model_name):
85
  Returns:
86
  tuple: A tuple containing:
87
  - str: Markdown formatted string with model information.
88
- - pandas.DataFrame: Styled DataFrame with the results.
89
  """
90
  try:
91
  models = get_models_by_architecture(model_name)
@@ -97,113 +188,43 @@ def display_results(model_name):
97
  )
98
 
99
  model_type = models[0].get("modelType", "N/A")
100
- data = []
101
- merged_models = set()
102
-
103
- for model in models:
104
- merged_models.add(model.get("name", "Unknown"))
105
- for config in model.get("configurations", []):
106
- try:
107
- instance_type = config.get("instanceType", "N/A")
108
-
109
- # Fetch cloud, GPU, GPU RAM, and URL information from instance_type_mappings
110
- instance_info = instance_type_mappings.get(instance_type, {})
111
- cloud = instance_info.get("cloud", "N/A")
112
- gpu = instance_info.get("gpu", "N/A")
113
- gpu_ram = instance_info.get("gpuRAM", "N/A")
114
- # url = instance_info.get("url", "")
115
-
116
- if "configurations" in config:
117
- for nested_config in config["configurations"]:
118
- data.append(
119
- {
120
- "Cloud": cloud,
121
- "Instance Type": instance_type,
122
- "GPU": gpu,
123
- "GPU RAM": gpu_ram,
124
- "Status": nested_config.get("status", "N/A"),
125
- "Quantization": nested_config.get(
126
- "quantization", "N/A"
127
- ),
128
- "Container": nested_config.get(
129
- "container",
130
- nested_config.get("tgi", "N/A"),
131
- ),
132
- "Tokens per Second": nested_config.get(
133
- "tokensPerSecond", "N/A"
134
- ),
135
- "Notes": nested_config.get("notes", ""),
136
- }
137
- )
138
- else:
139
- data.append(
140
- {
141
- "Cloud": cloud,
142
- "Instance Type": instance_type,
143
- "GPU": gpu,
144
- "GPU RAM": gpu_ram,
145
- "Status": config.get("status", "N/A"),
146
- "Quantization": config.get("quantization", "N/A"),
147
- "Container": config.get(
148
- "container", config.get("tgi", "N/A")
149
- ),
150
- "Tokens per Second": config.get(
151
- "tokensPerSecond", "N/A"
152
- ),
153
- "Notes": config.get("notes", ""),
154
- }
155
- )
156
- except (KeyError, ValueError, TypeError) as e:
157
- logging.error("Error processing configuration: %s", e)
158
- continue
159
 
160
  if not data:
161
  logging.warning("No data extracted for %s", model_name)
162
- return (
163
- f"No data for the selected model: {model_name}",
164
- pd.DataFrame(),
165
- )
166
 
 
167
  merged_models_message = (
168
  f"Note: Results merged from models: {', '.join(merged_models)}"
169
  if len(merged_models) > 1
170
  else None
171
  )
172
 
173
- sorted_data = sorted(data, key=lambda x: custom_sort_key(x["Instance Type"]))
174
-
175
  result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
176
  if merged_models_message:
177
  result_text += f"\n\n{merged_models_message}"
178
 
179
- df = pd.DataFrame(sorted_data)
180
-
181
- def color_status(val):
182
- if val == "OK":
183
- return "background-color: green; color: white"
184
- if val == "KO":
185
- return "background-color: red; color: white"
186
- return ""
187
-
188
- styled_df = df.style.applymap(color_status, subset=["Status"])
189
 
190
  return result_text, styled_df
191
 
192
  except (KeyError, ValueError, TypeError) as e:
193
  logging.exception("Error in display_results: %s", e)
194
- return (
195
- f"An error occurred for {model_name}: {str(e)}",
196
- pd.DataFrame(),
197
- )
198
 
199
 
200
  with gr.Blocks() as demo:
201
  gr.Markdown("# Model Benchmark Results")
202
  gr.Markdown(
203
  """This table shows the benchmark results for each model. \n\n
204
- Configurations are default unless noted.
205
  [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
206
- [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
 
 
 
207
  )
208
  model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
209
 
 
75
  return (instance_type, 0) # Fallback for non-standard instance types
76
 
77
 
78
+ def process_model_data(models):
79
+ """Process model data and return a list of configurations."""
80
+ data = []
81
+ for model in models:
82
+ for config in model.get("configurations", []):
83
+ process_configuration(config, data)
84
+ return data
85
+
86
+
87
+ def process_configuration(config, data):
88
+ """Process a single configuration and append to data list."""
89
+ instance_type = config.get("instanceType", "N/A")
90
+ instance_info = instance_type_mappings.get(instance_type, {})
91
+ instance_data = {
92
+ "cloud": instance_info.get("cloud", "N/A"),
93
+ "gpu": instance_info.get("gpu", "N/A"),
94
+ "gpu_ram": instance_info.get("gpuRAM", "N/A"),
95
+ "instance_type": instance_type,
96
+ }
97
+
98
+ if "configurations" in config:
99
+ for nested_config in config["configurations"]:
100
+ append_config_data(nested_config, instance_data, data)
101
+ else:
102
+ append_config_data(config, instance_data, data)
103
+
104
+
105
+ def append_config_data(config, instance_data, data):
106
+ """Append configuration data to the data list."""
107
+ data.append(
108
+ {
109
+ "Cloud": instance_data["cloud"],
110
+ "Instance Type": instance_data["instance_type"],
111
+ "GPU": instance_data["gpu"],
112
+ "GPU RAM": instance_data["gpu_ram"],
113
+ "Status": config.get("status", "N/A"),
114
+ "Quantization": config.get("quantization", "N/A"),
115
+ "Container": config.get("container", config.get("tgi", "N/A")),
116
+ "Tokens per Second": config.get("tokensPerSecond", 0),
117
+ "Notes": config.get("notes", ""),
118
+ }
119
+ )
120
+
121
+
122
+ def create_and_process_dataframe(data):
123
+ """Create and process the DataFrame with CPI calculation."""
124
+ df = pd.DataFrame(data)
125
+ df["CPI"] = df.apply(calculate_cpi, axis=1)
126
+ df["CPI"] = pd.to_numeric(df["CPI"], errors="coerce")
127
+ df["Tokens per Second"] = pd.to_numeric(df["Tokens per Second"], errors="coerce")
128
+
129
+ columns = df.columns.tolist()
130
+ tokens_per_second_index = columns.index("Tokens per Second")
131
+ columns.remove("CPI")
132
+ columns.insert(tokens_per_second_index + 1, "CPI")
133
+ df = df[columns]
134
+
135
+ return df.sort_values("CPI", ascending=False, na_position="last")
136
+
137
+
138
+ def calculate_cpi(row):
139
+ """Calculate CPI for a given row."""
140
+ instance_price = instance_type_mappings.get(row["Instance Type"], {}).get(
141
+ "price", 0
142
+ )
143
+ tokens_per_second = row["Tokens per Second"]
144
+
145
+ try:
146
+ tokens_per_second = float(tokens_per_second)
147
+ if tokens_per_second > 0 and instance_price > 0:
148
+ return tokens_per_second / instance_price
149
+ return pd.NA
150
+ except (ValueError, TypeError):
151
+ return pd.NA
152
+
153
+
154
+ def style_dataframe(df):
155
+ """Apply styling to the DataFrame."""
156
+
157
+ def color_status(val):
158
+ if val == "OK":
159
+ return "background-color: green; color: white"
160
+ if val == "KO":
161
+ return "background-color: red; color: white"
162
+ return ""
163
+
164
+ return df.style.map(color_status, subset=["Status"]).format(
165
+ {"CPI": "{:.2f}", "Tokens per Second": "{:.2f}"}, na_rep="N/A"
166
+ )
167
+
168
+
169
  def display_results(model_name):
170
  """
171
+ Process and display results for a given model, including CPI calculation.
172
 
173
  Args:
174
  model_name (str): Name of the model to display results for.
 
176
  Returns:
177
  tuple: A tuple containing:
178
  - str: Markdown formatted string with model information.
179
+ - pandas.DataFrame: Styled DataFrame with the results, including CPI.
180
  """
181
  try:
182
  models = get_models_by_architecture(model_name)
 
188
  )
189
 
190
  model_type = models[0].get("modelType", "N/A")
191
+ data = process_model_data(models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  if not data:
194
  logging.warning("No data extracted for %s", model_name)
195
+ return f"No data for the selected model: {model_name}", pd.DataFrame()
 
 
 
196
 
197
+ merged_models = set(model.get("name", "Unknown") for model in models)
198
  merged_models_message = (
199
  f"Note: Results merged from models: {', '.join(merged_models)}"
200
  if len(merged_models) > 1
201
  else None
202
  )
203
 
 
 
204
  result_text = f"## Results for {model_name}\n\nModel Type: {model_type}"
205
  if merged_models_message:
206
  result_text += f"\n\n{merged_models_message}"
207
 
208
+ df = create_and_process_dataframe(data)
209
+ styled_df = style_dataframe(df)
 
 
 
 
 
 
 
 
210
 
211
  return result_text, styled_df
212
 
213
  except (KeyError, ValueError, TypeError) as e:
214
  logging.exception("Error in display_results: %s", e)
215
+ return f"An error occurred for {model_name}: {str(e)}", pd.DataFrame()
 
 
 
216
 
217
 
218
  with gr.Blocks() as demo:
219
  gr.Markdown("# Model Benchmark Results")
220
  gr.Markdown(
221
  """This table shows the benchmark results for each model. \n\n
222
+ Configurations are default unless noted.\n
223
  [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
224
+ [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html),
225
+ [SGLang](https://github.com/sgl-project/sglang),
226
+ [Transformers-NeuronX](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/tnx_user_guide.html).\n\n
227
+ CPI means cost-perfomance index and is calculated as tokens per second / instance price."""
228
  )
229
  model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
230
 
results.py CHANGED
@@ -155,6 +155,7 @@ instance_type_mappings = {
155
  "gpu": "24xNeuronCore v2",
156
  "gpuRAM": "384 GB",
157
  "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
 
158
  },
159
  }
160
 
 
155
  "gpu": "24xNeuronCore v2",
156
  "gpuRAM": "384 GB",
157
  "url": "https://instances.vantage.sh/aws/ec2/inf2.48xlarge",
158
+ "price": 12.981,
159
  },
160
  }
161