Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
ca1267e
1 Parent(s): 9d64883

feat: add the ranking only tab for qa

Browse files
Files changed (1) hide show
  1. app.py +54 -14
app.py CHANGED
@@ -11,7 +11,7 @@ from src.about import (
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
13
  from src.display.css_html_js import custom_css
14
- from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
@@ -23,14 +23,14 @@ def restart_space():
23
  API.restart_space(repo_id=REPO_ID)
24
 
25
 
26
- try:
27
- snapshot_download(
28
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
29
- token=TOKEN
30
- )
31
- except Exception as e:
32
- print(f'failed to download')
33
- restart_space()
34
 
35
  raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
36
 
@@ -110,7 +110,7 @@ with demo:
110
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
111
 
112
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
113
- with gr.TabItem("Retriever + Reranker", id=10):
114
  with gr.Row():
115
  # search retrieval models
116
  with gr.Column():
@@ -149,17 +149,17 @@ with demo:
149
  leaderboard_table,
150
  queue=True
151
  )
152
- with gr.TabItem("Retriever Only", id=11):
153
  with gr.Column():
154
  search_bar_retriever = get_search_bar()
155
  selected_noreranker = get_noreranking_dropdown()
156
  lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
157
  lb_df_retriever = reset_rank(lb_df_retriever)
158
- hidden_lb_db_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
159
- hidden_lb_db_retriever = reset_rank(hidden_lb_db_retriever)
160
  lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
161
  # Dummy leaderboard for handling the case when the user uses backspace key
162
- hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_db_retriever, types_qa, visible=False)
 
 
163
 
164
  set_listeners(
165
  "qa",
@@ -188,7 +188,47 @@ with demo:
188
  lb_table_retriever,
189
  queue=True
190
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
193
  with gr.Row():
194
  with gr.Column(min_width=320):
 
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
13
  from src.display.css_html_js import custom_css
14
+ from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
 
23
  API.restart_space(repo_id=REPO_ID)
24
 
25
 
26
+ # try:
27
+ # snapshot_download(
28
+ # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
29
+ # token=TOKEN
30
+ # )
31
+ # except Exception as e:
32
+ # print(f'failed to download')
33
+ # restart_space()
34
 
35
  raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
36
 
 
110
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
111
 
112
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
113
+ with gr.TabItem("Retrieval + Reranking", id=10):
114
  with gr.Row():
115
  # search retrieval models
116
  with gr.Column():
 
149
  leaderboard_table,
150
  queue=True
151
  )
152
+ with gr.TabItem("Retrieval Only", id=11):
153
  with gr.Column():
154
  search_bar_retriever = get_search_bar()
155
  selected_noreranker = get_noreranking_dropdown()
156
  lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
157
  lb_df_retriever = reset_rank(lb_df_retriever)
 
 
158
  lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
159
  # Dummy leaderboard for handling the case when the user uses backspace key
160
+ hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
161
+ hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
162
+ hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
163
 
164
  set_listeners(
165
  "qa",
 
188
  lb_table_retriever,
189
  queue=True
190
  )
191
+ with gr.TabItem("Reranking Only", id=12):
192
+ with gr.Row():
193
+ with gr.Column(scale=1):
194
+ selected_rerankings_reranker = get_reranking_dropdown(reranking_models)
195
+ with gr.Column(scale=1):
196
+ search_bar_reranker = gr.Textbox(show_label=False, visible=False)
197
+ lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
198
+ lb_df_reranker = reset_rank(lb_df_reranker)
199
+ lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
200
+ hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
201
+ hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
202
+ hidden_lb_table_reranker = get_leaderboard_table(
203
+ hidden_lb_df_reranker, types_qa, visible=False
204
+ )
205
 
206
+ set_listeners(
207
+ "qa",
208
+ lb_table_reranker,
209
+ hidden_lb_table_reranker,
210
+ search_bar_reranker,
211
+ selected_domains,
212
+ selected_langs,
213
+ selected_rerankings_reranker,
214
+ show_anonymous,
215
+ show_revision_and_timestamp,
216
+ )
217
+ # set metric listener
218
+ selected_metric.change(
219
+ update_metric_qa,
220
+ [
221
+ selected_metric,
222
+ selected_domains,
223
+ selected_langs,
224
+ selected_rerankings_reranker,
225
+ search_bar_reranker,
226
+ show_anonymous,
227
+ show_revision_and_timestamp,
228
+ ],
229
+ lb_table_reranker,
230
+ queue=True
231
+ )
232
  with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
233
  with gr.Row():
234
  with gr.Column(min_width=320):