Update app.py
Browse files
app.py
CHANGED
@@ -62,29 +62,23 @@ scenarios = df['Scenario'].unique().tolist()
|
|
62 |
demo = gr.Blocks()
|
63 |
|
64 |
with demo:
|
|
|
65 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
66 |
gr.Markdown(
|
67 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
68 |
"[Papers](https://huggingface.co/onekq) " +
|
69 |
-
"[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) "
|
70 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
71 |
-
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
|
72 |
-
|
73 |
-
# Initialize leaderboard with the complete DataFrame
|
74 |
-
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
75 |
-
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
76 |
-
}, index=['pass@1'])).reset_index()
|
77 |
-
|
78 |
-
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
79 |
-
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
|
80 |
-
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
81 |
-
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
82 |
-
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
83 |
|
|
|
84 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
85 |
-
duo_leaderboard
|
|
|
|
|
86 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
87 |
-
leaderboard
|
88 |
|
89 |
# Launch the Gradio interface
|
90 |
demo.launch()
|
|
|
62 |
demo = gr.Blocks()
|
63 |
|
64 |
with demo:
|
65 |
+
# Markdown for the leaderboard header and external links
|
66 |
gr.Markdown("# π WebApp1K Models Leaderboard")
|
67 |
gr.Markdown(
|
68 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
69 |
"[Papers](https://huggingface.co/onekq) " +
|
70 |
+
"[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) " +
|
71 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
72 |
+
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
|
73 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
# WebApp1K-Duo leaderboard display
|
76 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
77 |
+
duo_leaderboard.render()
|
78 |
+
|
79 |
+
# WebApp1K main leaderboard display
|
80 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
81 |
+
leaderboard.render()
|
82 |
|
83 |
# Launch the Gradio interface
|
84 |
demo.launch()
|