Adam Jirkovsky
commited on
Commit
·
5c750e7
1
Parent(s):
0a6f522
Fix filtering issues
Browse files- app.py +16 -42
- src/display/utils.py +2 -2
- src/populate.py +1 -1
app.py
CHANGED
@@ -92,9 +92,10 @@ def update_table(
|
|
92 |
columns: list,
|
93 |
query: str,
|
94 |
):
|
95 |
-
columns += "
|
96 |
#filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
97 |
filtered_df = filter_queries(query, hidden_df)
|
|
|
98 |
df = select_columns(filtered_df, columns)
|
99 |
return df
|
100 |
|
@@ -183,7 +184,7 @@ def validate_captcha(input, text, img):
|
|
183 |
|
184 |
|
185 |
|
186 |
-
demo = gr.Blocks(css=custom_css)
|
187 |
with demo:
|
188 |
gr.HTML(TITLE)
|
189 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
@@ -203,7 +204,7 @@ with demo:
|
|
203 |
choices=[
|
204 |
c.name
|
205 |
for c in fields(AutoEvalColumn)
|
206 |
-
if not c.hidden and not c.never_hidden and not c.dummy
|
207 |
],
|
208 |
value=[
|
209 |
c.name
|
@@ -214,31 +215,7 @@ with demo:
|
|
214 |
elem_id="column-select",
|
215 |
interactive=True,
|
216 |
)
|
217 |
-
|
218 |
-
with gr.Column(min_width=320):
|
219 |
-
# with gr.Box(elem_id="box-filter"):
|
220 |
-
filter_columns_type = gr.CheckboxGroup(
|
221 |
-
label="Model types",
|
222 |
-
choices=[t.to_str() for t in ModelType],
|
223 |
-
value=[t.to_str() for t in ModelType],
|
224 |
-
interactive=True,
|
225 |
-
elem_id="filter-columns-type",
|
226 |
-
)
|
227 |
-
filter_columns_precision = gr.CheckboxGroup(
|
228 |
-
label="Precision",
|
229 |
-
choices=[i.value.name for i in Precision],
|
230 |
-
value=[i.value.name for i in Precision],
|
231 |
-
interactive=True,
|
232 |
-
elem_id="filter-columns-precision",
|
233 |
-
)
|
234 |
-
filter_columns_size = gr.CheckboxGroup(
|
235 |
-
label="Model sizes (in billions of parameters)",
|
236 |
-
choices=list(NUMERIC_INTERVALS.keys()),
|
237 |
-
value=list(NUMERIC_INTERVALS.keys()),
|
238 |
-
interactive=True,
|
239 |
-
elem_id="filter-columns-size",
|
240 |
-
)
|
241 |
-
"""
|
242 |
gr.Markdown(TABLE_DESC, elem_classes="markdown-text")
|
243 |
leaderboard_table = gr.Dataframe(
|
244 |
value=leaderboard_df[
|
@@ -268,22 +245,19 @@ with demo:
|
|
268 |
],
|
269 |
leaderboard_table,
|
270 |
)
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
leaderboard_table,
|
282 |
-
queue=True,
|
283 |
-
)
|
284 |
|
285 |
model_num = len(original_df)
|
286 |
-
graph_df = original_df.drop(columns=["
|
287 |
graph_ax = graph_df.plot(
|
288 |
kind="barh",
|
289 |
title="Graphical performance comparison",
|
|
|
92 |
columns: list,
|
93 |
query: str,
|
94 |
):
|
95 |
+
columns += " " # The dataframe does not display the last column - BUG in gradio?
|
96 |
#filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
97 |
filtered_df = filter_queries(query, hidden_df)
|
98 |
+
filtered_df["Model"] = filtered_df.apply(add_model_hyperlinks, axis=1)
|
99 |
df = select_columns(filtered_df, columns)
|
100 |
return df
|
101 |
|
|
|
184 |
|
185 |
|
186 |
|
187 |
+
demo = gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.orange))
|
188 |
with demo:
|
189 |
gr.HTML(TITLE)
|
190 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
204 |
choices=[
|
205 |
c.name
|
206 |
for c in fields(AutoEvalColumn)
|
207 |
+
if not c.hidden and not c.never_hidden # and not c.dummy # Causes errors
|
208 |
],
|
209 |
value=[
|
210 |
c.name
|
|
|
215 |
elem_id="column-select",
|
216 |
interactive=True,
|
217 |
)
|
218 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
gr.Markdown(TABLE_DESC, elem_classes="markdown-text")
|
220 |
leaderboard_table = gr.Dataframe(
|
221 |
value=leaderboard_df[
|
|
|
245 |
],
|
246 |
leaderboard_table,
|
247 |
)
|
248 |
+
shown_columns.change(
|
249 |
+
update_table,
|
250 |
+
[
|
251 |
+
hidden_leaderboard_table_for_search,
|
252 |
+
shown_columns,
|
253 |
+
search_bar,
|
254 |
+
],
|
255 |
+
leaderboard_table,
|
256 |
+
queue=True,
|
257 |
+
)
|
|
|
|
|
|
|
258 |
|
259 |
model_num = len(original_df)
|
260 |
+
graph_df = original_df.drop(columns=[" ", "Precision", "Model URL"]).set_index("Model").T
|
261 |
graph_ax = graph_df.plot(
|
262 |
kind="barh",
|
263 |
title="Graphical performance comparison",
|
src/display/utils.py
CHANGED
@@ -72,7 +72,7 @@ auto_eval_column_dict.append(["mmlu_cs", ColumnContent, ColumnContent("MMLU", "n
|
|
72 |
auto_eval_column_dict.append(["sqad_cs", ColumnContent, ColumnContent("SQAD", "number", True)])
|
73 |
auto_eval_column_dict.append(["subjectivity_cs", ColumnContent, ColumnContent("Subjectivity", "number", True)])
|
74 |
auto_eval_column_dict.append(["truthfulqa_cs", ColumnContent, ColumnContent("TruthfulQA", "number", True)])
|
75 |
-
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("
|
76 |
|
77 |
|
78 |
|
@@ -100,7 +100,7 @@ HEADER_MAP = {
|
|
100 |
"sqad_cs": "SQAD",
|
101 |
"subjectivity_cs": "Subjectivity",
|
102 |
"truthfulqa_cs": "TruthfulQA",
|
103 |
-
"dummy": "
|
104 |
"aggregate_score": "Aggregate Score",
|
105 |
}
|
106 |
|
|
|
72 |
auto_eval_column_dict.append(["sqad_cs", ColumnContent, ColumnContent("SQAD", "number", True)])
|
73 |
auto_eval_column_dict.append(["subjectivity_cs", ColumnContent, ColumnContent("Subjectivity", "number", True)])
|
74 |
auto_eval_column_dict.append(["truthfulqa_cs", ColumnContent, ColumnContent("TruthfulQA", "number", True)])
|
75 |
+
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent(" ", "str", True, dummy=True)]) # The dataframe does not display the last column - BUG in gradio?
|
76 |
|
77 |
|
78 |
|
|
|
100 |
"sqad_cs": "SQAD",
|
101 |
"subjectivity_cs": "Subjectivity",
|
102 |
"truthfulqa_cs": "TruthfulQA",
|
103 |
+
"dummy": " ",
|
104 |
"aggregate_score": "Aggregate Score",
|
105 |
}
|
106 |
|
src/populate.py
CHANGED
@@ -22,7 +22,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
22 |
df["Math (Avg.)"] = df[["GSM8K", "Klokanek"]].mean(axis=1)
|
23 |
df["Classification (Avg.)"] = df[["Czech News", "Facebook Comments", "Mall Reviews", "Subjectivity"]].mean(axis=1)
|
24 |
df["Aggregate Score"] = df[["Grammar (Avg.)", "Knowledge (Avg.)", "Reasoning (Avg.)", "Math (Avg.)", "Classification (Avg.)"]].mean(axis=1)
|
25 |
-
df["
|
26 |
df = df[cols].round(decimals=2)
|
27 |
df.replace(r'\s+', np.nan, regex=True)
|
28 |
# filter out if any of the benchmarks have not been produced
|
|
|
22 |
df["Math (Avg.)"] = df[["GSM8K", "Klokanek"]].mean(axis=1)
|
23 |
df["Classification (Avg.)"] = df[["Czech News", "Facebook Comments", "Mall Reviews", "Subjectivity"]].mean(axis=1)
|
24 |
df["Aggregate Score"] = df[["Grammar (Avg.)", "Knowledge (Avg.)", "Reasoning (Avg.)", "Math (Avg.)", "Classification (Avg.)"]].mean(axis=1)
|
25 |
+
df[" "] = "" # The dataframe does not display the last column - BUG in gradio?
|
26 |
df = df[cols].round(decimals=2)
|
27 |
df.replace(r'\s+', np.nan, regex=True)
|
28 |
# filter out if any of the benchmarks have not been produced
|