Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
61c2746
1
Parent(s):
9af5ebf
update
Browse files- app.py +1 -5
- src/display/utils.py +13 -0
- src/tools/plots.py +17 -1
app.py
CHANGED
@@ -30,11 +30,7 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
30 |
from src.submission.submit import add_new_eval
|
31 |
# from src.submission.check_validity import already_submitted_models
|
32 |
# from src.tools.collections import update_collections
|
33 |
-
from src.tools.plots import
|
34 |
-
create_metric_plot_obj,
|
35 |
-
create_plot_df,
|
36 |
-
create_scores_df,
|
37 |
-
)
|
38 |
|
39 |
|
40 |
def restart_space():
|
|
|
30 |
from src.submission.submit import add_new_eval
|
31 |
# from src.submission.check_validity import already_submitted_models
|
32 |
# from src.tools.collections import update_collections
|
33 |
+
# from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
|
|
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
def restart_space():
|
src/display/utils.py
CHANGED
@@ -62,6 +62,7 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
|
|
62 |
# We use make dataclass to dynamically fill the scores from Tasks
|
63 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
64 |
|
|
|
65 |
@dataclass(frozen=True)
|
66 |
class EvalQueueColumn: # Queue column
|
67 |
model = ColumnContent("model", "markdown", True)
|
@@ -72,6 +73,18 @@ class EvalQueueColumn: # Queue column
|
|
72 |
status = ColumnContent("status", "str", True)
|
73 |
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
@dataclass
|
76 |
class ModelDetails:
|
77 |
name: str
|
|
|
62 |
# We use make dataclass to dynamically fill the scores from Tasks
|
63 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
64 |
|
65 |
+
|
66 |
@dataclass(frozen=True)
|
67 |
class EvalQueueColumn: # Queue column
|
68 |
model = ColumnContent("model", "markdown", True)
|
|
|
73 |
status = ColumnContent("status", "str", True)
|
74 |
|
75 |
|
76 |
+
# Define the human baselines
|
77 |
+
human_baseline_row = {
|
78 |
+
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
79 |
+
AutoEvalColumn.revision.name: "N/A",
|
80 |
+
AutoEvalColumn.precision.name: None,
|
81 |
+
AutoEvalColumn.average.name: 100.0,
|
82 |
+
AutoEvalColumn.nqopen.name: 100.0,
|
83 |
+
AutoEvalColumn.triviaqa.name: 100.0,
|
84 |
+
AutoEvalColumn.dummy.name: "human_baseline",
|
85 |
+
AutoEvalColumn.model_type.name: "",
|
86 |
+
}
|
87 |
+
|
88 |
@dataclass
|
89 |
class ModelDetails:
|
90 |
name: str
|
src/tools/plots.py
CHANGED
@@ -93,7 +93,8 @@ def create_metric_plot_obj(df: pd.DataFrame, metrics: list[str], title: str) ->
|
|
93 |
df = df[df["task"].isin(metrics)]
|
94 |
|
95 |
# Filter the human baselines based on the specified metrics
|
96 |
-
|
|
|
97 |
|
98 |
# Create a line figure using plotly express with specified markers and custom data
|
99 |
fig = px.line(
|
@@ -128,6 +129,21 @@ def create_metric_plot_obj(df: pd.DataFrame, metrics: list[str], title: str) ->
|
|
128 |
for trace in fig.data:
|
129 |
metric_color_mapping[trace.name] = trace.line.color
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
return fig
|
132 |
|
133 |
|
|
|
93 |
df = df[df["task"].isin(metrics)]
|
94 |
|
95 |
# Filter the human baselines based on the specified metrics
|
96 |
+
from src.display.utils import human_baseline_row as HUMAN_BASELINE
|
97 |
+
filtered_human_baselines = {k: v for k, v in HUMAN_BASELINE.items() if k in metrics}
|
98 |
|
99 |
# Create a line figure using plotly express with specified markers and custom data
|
100 |
fig = px.line(
|
|
|
129 |
for trace in fig.data:
|
130 |
metric_color_mapping[trace.name] = trace.line.color
|
131 |
|
132 |
+
# Iterate over filtered human baselines and add horizontal lines to the figure
|
133 |
+
for metric, value in filtered_human_baselines.items():
|
134 |
+
color = metric_color_mapping.get(metric, "blue") # Retrieve color from mapping; default to blue if not found
|
135 |
+
location = "top left" if metric == "HellaSwag" else "bottom left" # Set annotation position
|
136 |
+
# Add horizontal line with matched color and positioned annotation
|
137 |
+
fig.add_hline(
|
138 |
+
y=value,
|
139 |
+
line_dash="dot",
|
140 |
+
annotation_text=f"{metric} human baseline",
|
141 |
+
annotation_position=location,
|
142 |
+
annotation_font_size=10,
|
143 |
+
annotation_font_color=color,
|
144 |
+
line_color=color,
|
145 |
+
)
|
146 |
+
|
147 |
return fig
|
148 |
|
149 |
|