onekq commited on
Commit
bfddf6d
β€’
1 Parent(s): 34ff948

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -61,7 +61,6 @@ scenarios = df['Scenario'].unique().tolist()
61
 
62
  demo = gr.Blocks()
63
 
64
-
65
  with demo:
66
  # Markdown for the leaderboard header and external links
67
  gr.Markdown("# πŸ† WebApp1K Models Leaderboard")
@@ -73,6 +72,17 @@ with demo:
73
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
74
  )
75
 
 
 
 
 
 
 
 
 
 
 
 
76
  # WebApp1K-Duo leaderboard display
77
  gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
78
  duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
 
61
 
62
  demo = gr.Blocks()
63
 
 
64
  with demo:
65
  # Markdown for the leaderboard header and external links
66
  gr.Markdown("# πŸ† WebApp1K Models Leaderboard")
 
72
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)"
73
  )
74
 
75
+ # Initialize leaderboard with the complete DataFrame
76
+ duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
77
+ 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
78
+ }, index=['pass@1'])).reset_index()
79
+
80
+ complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
81
+ 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
82
+ 'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
83
+ 'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
84
+ }, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
85
+
86
  # WebApp1K-Duo leaderboard display
87
  gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
88
  duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)