Update app.py
Browse files
app.py
CHANGED
@@ -62,15 +62,13 @@ scenarios = df['Scenario'].unique().tolist()
|
|
62 |
demo = gr.Blocks()
|
63 |
|
64 |
with demo:
|
65 |
-
# Markdown for the leaderboard header and external links
|
66 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
67 |
gr.Markdown(
|
68 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
69 |
"[Papers](https://huggingface.co/onekq) " +
|
70 |
-
"[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) "
|
71 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
72 |
-
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
|
73 |
-
)
|
74 |
|
75 |
# Initialize leaderboard with the complete DataFrame
|
76 |
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
@@ -82,14 +80,11 @@ with demo:
|
|
82 |
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
83 |
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
84 |
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
85 |
-
|
86 |
-
# WebApp1K-Duo leaderboard display
|
87 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
88 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|
89 |
-
|
90 |
-
# WebApp1K main leaderboard display
|
91 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
92 |
leaderboard = init_leaderboard(complete_pass_at_k, height=800)
|
93 |
|
94 |
# Launch the Gradio interface
|
95 |
-
demo.launch()
|
|
|
62 |
demo = gr.Blocks()
|
63 |
|
64 |
with demo:
|
|
|
65 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
66 |
gr.Markdown(
|
67 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
68 |
"[Papers](https://huggingface.co/onekq) " +
|
69 |
+
"[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) "
|
70 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
71 |
+
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
|
|
|
72 |
|
73 |
# Initialize leaderboard with the complete DataFrame
|
74 |
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
|
|
80 |
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
81 |
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
82 |
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
83 |
+
|
|
|
84 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
85 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|
|
|
|
|
86 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
87 |
leaderboard = init_leaderboard(complete_pass_at_k, height=800)
|
88 |
|
89 |
# Launch the Gradio interface
|
90 |
+
demo.launch()
|