Files changed (3) hide show
  1. README.md +1 -0
  2. app.py +14 -16
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,6 +4,7 @@ emoji: πŸ₯‡
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: true
9
  license: mit
 
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.41.0
8
  app_file: app.py
9
  pinned: true
10
  license: mit
app.py CHANGED
@@ -56,12 +56,21 @@ def init_leaderboard(dataframe, default_selection=["Model", "pass@1", "pass@5",
56
  )
57
 
58
  # Gradio interface
59
- models = df['Model'].unique().tolist()
60
- scenarios = df['Scenario'].unique().tolist()
61
 
62
- demo = gr.Blocks()
 
 
 
63
 
64
- with demo:
 
 
 
 
 
 
65
  gr.Markdown("# πŸ† WebApp1K Models Leaderboard")
66
  gr.Markdown(
67
  "## [Discord](https://discord.gg/3qpAbWC7) " +
@@ -70,21 +79,10 @@ with demo:
70
  "[Github](https://github.com/onekq/WebApp1k) " +
71
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
72
 
73
- # Initialize leaderboard with the complete DataFrame
74
- duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
75
- 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
76
- }, index=['pass@1'])).reset_index()
77
-
78
- complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
79
- 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
80
- 'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
81
- 'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
82
- }, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
83
-
84
  gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
85
  duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
86
  gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
87
- #leaderboard = init_leaderboard(complete_pass_at_k, height=800)
88
 
89
  # Launch the Gradio interface
90
  demo.launch()
 
56
  )
57
 
58
  # Gradio interface
59
+ #models = df['Model'].unique().tolist()
60
+ #scenarios = df['Scenario'].unique().tolist()
61
 
62
+ # Initialize leaderboard with the complete DataFrame
63
+ duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
64
+ 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
65
+ }, index=['pass@1'])).reset_index()
66
 
67
+ complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
68
+ 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
69
+ 'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
70
+ 'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
71
+ }, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
72
+
73
+ with gr.Blocks() as demo:
74
  gr.Markdown("# πŸ† WebApp1K Models Leaderboard")
75
  gr.Markdown(
76
  "## [Discord](https://discord.gg/3qpAbWC7) " +
 
79
  "[Github](https://github.com/onekq/WebApp1k) " +
80
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
81
 
 
 
 
 
 
 
 
 
 
 
 
82
  gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
83
  duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
84
  gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
85
+ leaderboard = init_leaderboard(complete_pass_at_k, default_selection = [], height=800)
86
 
87
  # Launch the Gradio interface
88
  demo.launch()
requirements.txt CHANGED
@@ -3,7 +3,7 @@ black
3
  datasets
4
  gradio
5
  gradio[oauth]
6
- gradio_leaderboard==0.0.9
7
  gradio_client
8
  huggingface-hub>=0.18.0
9
  matplotlib
 
3
  datasets
4
  gradio
5
  gradio[oauth]
6
+ gradio_leaderboard>=0.0.9
7
  gradio_client
8
  huggingface-hub>=0.18.0
9
  matplotlib