Update app.py
Browse files
app.py
CHANGED
@@ -61,7 +61,6 @@ scenarios = df['Scenario'].unique().tolist()
|
|
61 |
|
62 |
demo = gr.Blocks()
|
63 |
|
64 |
-
|
65 |
with demo:
|
66 |
# Markdown for the leaderboard header and external links
|
67 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
@@ -73,6 +72,17 @@ with demo:
|
|
73 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
|
74 |
)
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# WebApp1K-Duo leaderboard display
|
77 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
78 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|
|
|
61 |
|
62 |
demo = gr.Blocks()
|
63 |
|
|
|
64 |
with demo:
|
65 |
# Markdown for the leaderboard header and external links
|
66 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
|
|
72 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
|
73 |
)
|
74 |
|
75 |
+
# Initialize leaderboard with the complete DataFrame
|
76 |
+
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
77 |
+
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
78 |
+
}, index=['pass@1'])).reset_index()
|
79 |
+
|
80 |
+
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
81 |
+
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
|
82 |
+
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
83 |
+
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
84 |
+
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
85 |
+
|
86 |
# WebApp1K-Duo leaderboard display
|
87 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
88 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|