Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: add the ranking only tab for qa
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from src.about import (
|
|
11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
13 |
from src.display.css_html_js import custom_css
|
14 |
-
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
|
@@ -23,14 +23,14 @@ def restart_space():
|
|
23 |
API.restart_space(repo_id=REPO_ID)
|
24 |
|
25 |
|
26 |
-
try:
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
except Exception as e:
|
32 |
-
|
33 |
-
|
34 |
|
35 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
36 |
|
@@ -110,7 +110,7 @@ with demo:
|
|
110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
111 |
|
112 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
113 |
-
with gr.TabItem("
|
114 |
with gr.Row():
|
115 |
# search retrieval models
|
116 |
with gr.Column():
|
@@ -149,17 +149,17 @@ with demo:
|
|
149 |
leaderboard_table,
|
150 |
queue=True
|
151 |
)
|
152 |
-
with gr.TabItem("
|
153 |
with gr.Column():
|
154 |
search_bar_retriever = get_search_bar()
|
155 |
selected_noreranker = get_noreranking_dropdown()
|
156 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
157 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
158 |
-
hidden_lb_db_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
159 |
-
hidden_lb_db_retriever = reset_rank(hidden_lb_db_retriever)
|
160 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
161 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
162 |
-
|
|
|
|
|
163 |
|
164 |
set_listeners(
|
165 |
"qa",
|
@@ -188,7 +188,47 @@ with demo:
|
|
188 |
lb_table_retriever,
|
189 |
queue=True
|
190 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
193 |
with gr.Row():
|
194 |
with gr.Column(min_width=320):
|
|
|
11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
13 |
from src.display.css_html_js import custom_css
|
14 |
+
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
|
|
|
23 |
API.restart_space(repo_id=REPO_ID)
|
24 |
|
25 |
|
26 |
+
# try:
|
27 |
+
# snapshot_download(
|
28 |
+
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
29 |
+
# token=TOKEN
|
30 |
+
# )
|
31 |
+
# except Exception as e:
|
32 |
+
# print(f'failed to download')
|
33 |
+
# restart_space()
|
34 |
|
35 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
36 |
|
|
|
110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
111 |
|
112 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
113 |
+
with gr.TabItem("Retrieval + Reranking", id=10):
|
114 |
with gr.Row():
|
115 |
# search retrieval models
|
116 |
with gr.Column():
|
|
|
149 |
leaderboard_table,
|
150 |
queue=True
|
151 |
)
|
152 |
+
with gr.TabItem("Retrieval Only", id=11):
|
153 |
with gr.Column():
|
154 |
search_bar_retriever = get_search_bar()
|
155 |
selected_noreranker = get_noreranking_dropdown()
|
156 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
157 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
|
|
|
|
158 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
159 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
160 |
+
hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
161 |
+
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
162 |
+
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
|
163 |
|
164 |
set_listeners(
|
165 |
"qa",
|
|
|
188 |
lb_table_retriever,
|
189 |
queue=True
|
190 |
)
|
191 |
+
with gr.TabItem("Reranking Only", id=12):
|
192 |
+
with gr.Row():
|
193 |
+
with gr.Column(scale=1):
|
194 |
+
selected_rerankings_reranker = get_reranking_dropdown(reranking_models)
|
195 |
+
with gr.Column(scale=1):
|
196 |
+
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
197 |
+
lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
198 |
+
lb_df_reranker = reset_rank(lb_df_reranker)
|
199 |
+
lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
|
200 |
+
hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
201 |
+
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
202 |
+
hidden_lb_table_reranker = get_leaderboard_table(
|
203 |
+
hidden_lb_df_reranker, types_qa, visible=False
|
204 |
+
)
|
205 |
|
206 |
+
set_listeners(
|
207 |
+
"qa",
|
208 |
+
lb_table_reranker,
|
209 |
+
hidden_lb_table_reranker,
|
210 |
+
search_bar_reranker,
|
211 |
+
selected_domains,
|
212 |
+
selected_langs,
|
213 |
+
selected_rerankings_reranker,
|
214 |
+
show_anonymous,
|
215 |
+
show_revision_and_timestamp,
|
216 |
+
)
|
217 |
+
# set metric listener
|
218 |
+
selected_metric.change(
|
219 |
+
update_metric_qa,
|
220 |
+
[
|
221 |
+
selected_metric,
|
222 |
+
selected_domains,
|
223 |
+
selected_langs,
|
224 |
+
selected_rerankings_reranker,
|
225 |
+
search_bar_reranker,
|
226 |
+
show_anonymous,
|
227 |
+
show_revision_and_timestamp,
|
228 |
+
],
|
229 |
+
lb_table_reranker,
|
230 |
+
queue=True
|
231 |
+
)
|
232 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
233 |
with gr.Row():
|
234 |
with gr.Column(min_width=320):
|