Spaces:
Running
Running
natolambert
commited on
Commit
β’
0de05c0
1
Parent(s):
6ce351e
up
Browse files
app.py
CHANGED
@@ -202,10 +202,14 @@ def regex_table(dataframe, regex, filter_button):
|
|
202 |
# Join the list into a single regex pattern with '|' acting as OR
|
203 |
combined_regex = '|'.join(regex_list)
|
204 |
|
|
|
|
|
|
|
205 |
# if filter_button, remove all rows with "ai2" in the model name
|
|
|
206 |
if isinstance(filter_button, list) or isinstance(filter_button, str):
|
207 |
-
if "
|
208 |
-
|
209 |
if "Seq. Classifiers" not in filter_button:
|
210 |
dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
|
211 |
if "DPO" not in filter_button:
|
@@ -220,6 +224,13 @@ def regex_table(dataframe, regex, filter_button):
|
|
220 |
# replace column '' with count/rank
|
221 |
data[''] = np.arange(1, 1 + len(data))
|
222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
# if Score exists, round to 2 decimals
|
224 |
if "Score" in data.columns:
|
225 |
data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
|
@@ -255,8 +266,8 @@ with gr.Blocks(css=custom_css) as app:
|
|
255 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
256 |
placeholder="Model Search (delimit with , )",
|
257 |
show_label=False)
|
258 |
-
model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "
|
259 |
-
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
260 |
label="Model Types",
|
261 |
show_label=False,
|
262 |
# info="Which model types to include.",
|
@@ -270,7 +281,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
270 |
visible=False,
|
271 |
)
|
272 |
rewardbench_table = gr.Dataframe(
|
273 |
-
regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers"]).values,
|
274 |
datatype=col_types_rewardbench_avg,
|
275 |
headers=rewardbench_data_avg.columns.tolist(),
|
276 |
elem_id="rewardbench_dataframe_avg",
|
@@ -280,7 +291,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
280 |
with gr.TabItem("π RewardBench - Detailed"):
|
281 |
with gr.Row():
|
282 |
search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
283 |
-
model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"
|
284 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
285 |
label="Model Types",
|
286 |
show_label=False,
|
@@ -320,7 +331,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
320 |
with gr.TabItem("Prior Test Sets"):
|
321 |
with gr.Row():
|
322 |
search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
323 |
-
model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"
|
324 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
325 |
label="Model Types",
|
326 |
show_label=False,
|
|
|
202 |
# Join the list into a single regex pattern with '|' acting as OR
|
203 |
combined_regex = '|'.join(regex_list)
|
204 |
|
205 |
+
# remove internal ai2 data
|
206 |
+
dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
|
207 |
+
|
208 |
# if filter_button, remove all rows with "ai2" in the model name
|
209 |
+
update_scores = False
|
210 |
if isinstance(filter_button, list) or isinstance(filter_button, str):
|
211 |
+
if "Prior Sets" not in filter_button and 'Prior Sets (0.5 weight)' in dataframe.columns:
|
212 |
+
update_scores = True
|
213 |
if "Seq. Classifiers" not in filter_button:
|
214 |
dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
|
215 |
if "DPO" not in filter_button:
|
|
|
224 |
# replace column '' with count/rank
|
225 |
data[''] = np.arange(1, 1 + len(data))
|
226 |
|
227 |
+
# if update the score to not use prior sets, do so
|
228 |
+
if update_scores:
|
229 |
+
data["Score"] = (data["Chat"] + data["Chat Hard"] + data["Safety"] + data["Reasoning"]) / 4
|
230 |
+
data["Prior Sets (0.5 weight)"] = np.NaN
|
231 |
+
# sort array by Score column
|
232 |
+
data = data.sort_values(by='Score', ascending=False)
|
233 |
+
|
234 |
# if Score exists, round to 2 decimals
|
235 |
if "Score" in data.columns:
|
236 |
data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
|
|
|
266 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
267 |
placeholder="Model Search (delimit with , )",
|
268 |
show_label=False)
|
269 |
+
model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "Prior Sets"],
|
270 |
+
value=["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"],
|
271 |
label="Model Types",
|
272 |
show_label=False,
|
273 |
# info="Which model types to include.",
|
|
|
281 |
visible=False,
|
282 |
)
|
283 |
rewardbench_table = gr.Dataframe(
|
284 |
+
regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"]).values,
|
285 |
datatype=col_types_rewardbench_avg,
|
286 |
headers=rewardbench_data_avg.columns.tolist(),
|
287 |
elem_id="rewardbench_dataframe_avg",
|
|
|
291 |
with gr.TabItem("π RewardBench - Detailed"):
|
292 |
with gr.Row():
|
293 |
search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
294 |
+
model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
|
295 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
296 |
label="Model Types",
|
297 |
show_label=False,
|
|
|
331 |
with gr.TabItem("Prior Test Sets"):
|
332 |
with gr.Row():
|
333 |
search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
334 |
+
model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
|
335 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
336 |
label="Model Types",
|
337 |
show_label=False,
|