Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
a30a228
·
1 Parent(s): e84128d

chore: clean up

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +8 -0
  3. utils.py +7 -11
.gitignore CHANGED
@@ -15,3 +15,4 @@ logs/
15
  .idea/
16
  .venv/
17
  toys/
 
 
15
  .idea/
16
  .venv/
17
  toys/
18
+ .DS_Store
app.py CHANGED
@@ -290,6 +290,14 @@ with demo:
290
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
291
  with gr.Row():
292
  gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
 
 
 
 
 
 
 
 
293
  with gr.Row():
294
  file_output = gr.File()
295
  with gr.Row():
 
290
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
291
  with gr.Row():
292
  gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
293
+ with gr.Row():
294
+ with gr.Column():
295
+ benchmark_version = gr.Dropdown(
296
+ ['AIR-Bench_24.04',], value=['AIR-Bench_24.04',], interactive=True, label="AIR-Bench Version")
297
+ with gr.Column():
298
+ model_name_textbox = gr.Textbox(label="Model name")
299
+ with gr.Column():
300
+ model_url = gr.Textbox(label="Model URL")
301
  with gr.Row():
302
  file_output = gr.File()
303
  with gr.Row():
utils.py CHANGED
@@ -1,14 +1,10 @@
1
- import pandas as pd
2
- import os
3
-
4
- from src.display.formatting import styled_error, styled_message, styled_warning
5
 
6
- from huggingface_hub import HfApi
7
 
8
- from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, QA_BENCHMARK_COLS, LONG_DOC_BENCHMARK_COLS
9
  from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
 
10
  from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
11
- from typing import List
12
 
13
 
14
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
@@ -41,7 +37,7 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
41
  return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
42
 
43
 
44
- def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str="qa") -> pd.DataFrame:
45
  if task == "qa":
46
  always_here_cols = [
47
  AutoEvalColumnQA.retrieval_model.name,
@@ -111,7 +107,7 @@ def update_metric(
111
  query: str,
112
  ) -> pd.DataFrame:
113
  if task == 'qa':
114
- leaderboard_df = get_leaderboard_df(raw_data, COLS_QA, QA_BENCHMARK_COLS, task=task, metric=metric)
115
  return update_table(
116
  leaderboard_df,
117
  domains,
@@ -120,7 +116,7 @@ def update_metric(
120
  query
121
  )
122
  elif task == 'long_doc':
123
- leaderboard_df = get_leaderboard_df(raw_data, COLS_LONG_DOC, LONG_DOC_BENCHMARK_COLS, task=task, metric=metric)
124
  return update_table_long_doc(
125
  leaderboard_df,
126
  domains,
@@ -138,4 +134,4 @@ def upload_file(files):
138
  # print(file_paths)
139
  # HfApi(token="").upload_file(...)
140
  # os.remove(fp)
141
- return file_paths
 
1
+ from typing import List
 
 
 
2
 
3
+ import pandas as pd
4
 
 
5
  from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
6
+ from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC
7
  from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
 
8
 
9
 
10
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
 
37
  return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
38
 
39
 
40
+ def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str = "qa") -> pd.DataFrame:
41
  if task == "qa":
42
  always_here_cols = [
43
  AutoEvalColumnQA.retrieval_model.name,
 
107
  query: str,
108
  ) -> pd.DataFrame:
109
  if task == 'qa':
110
+ leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
111
  return update_table(
112
  leaderboard_df,
113
  domains,
 
116
  query
117
  )
118
  elif task == 'long_doc':
119
+ leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
120
  return update_table_long_doc(
121
  leaderboard_df,
122
  domains,
 
134
  # print(file_paths)
135
  # HfApi(token="").upload_file(...)
136
  # os.remove(fp)
137
+ return file_paths