MINGYISU commited on
Commit
0c9e3fb
·
1 Parent(s): 498bdf4

init commit

Browse files
Files changed (9) hide show
  1. .gitignore +13 -0
  2. .pre-commit-config.yaml +53 -0
  3. app.py +102 -196
  4. index.html +0 -19
  5. results.csv +15 -0
  6. src/about.py +1 -1
  7. src/display/css_html_js.py +1 -0
  8. style.css +0 -28
  9. utils.py +183 -0
.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_evals/
2
+ venv/
3
+ __pycache__/
4
+ .env
5
+ .ipynb_checkpoints
6
+ *ipynb
7
+ .vscode/
8
+
9
+ eval-queue/
10
+ eval-results/
11
+ eval-queue-bk/
12
+ eval-results-bk/
13
+ logs/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ default_language_version:
16
+ python: python3
17
+
18
+ ci:
19
+ autofix_prs: true
20
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
+ autoupdate_schedule: quarterly
22
+
23
+ repos:
24
+ - repo: https://github.com/pre-commit/pre-commit-hooks
25
+ rev: v4.3.0
26
+ hooks:
27
+ - id: check-yaml
28
+ - id: check-case-conflict
29
+ - id: detect-private-key
30
+ - id: check-added-large-files
31
+ args: ['--maxkb=1000']
32
+ - id: requirements-txt-fixer
33
+ - id: end-of-file-fixer
34
+ - id: trailing-whitespace
35
+
36
+ - repo: https://github.com/PyCQA/isort
37
+ rev: 5.12.0
38
+ hooks:
39
+ - id: isort
40
+ name: Format imports
41
+
42
+ - repo: https://github.com/psf/black
43
+ rev: 22.12.0
44
+ hooks:
45
+ - id: black
46
+ name: Format code
47
+ additional_dependencies: ['click==8.0.2']
48
+
49
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
50
+ # Ruff version.
51
+ rev: 'v0.0.267'
52
+ hooks:
53
+ - id: ruff
app.py CHANGED
@@ -1,204 +1,110 @@
1
- import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
- import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
  with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
  )
 
158
 
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
 
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  )
190
-
191
- with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  )
 
 
 
 
 
 
 
 
 
 
200
 
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
+ from utils import *
2
+
3
+ global data_component
4
+
5
+ def update_table(query, min_size, max_size, selected_subjects=None):
6
+ df = get_df()
7
+ filtered_df = search_and_filter_models(df, query, min_size, max_size)
8
+ if selected_subjects and len(selected_subjects) > 0:
9
+ base_columns = ['Models', 'Model Size(B)', 'Data Source', 'Overall']
10
+ selected_columns = base_columns + selected_subjects
11
+ filtered_df = filtered_df[selected_columns]
12
+ return filtered_df
13
+
14
+ with gr.Blocks() as block:
15
+ gr.Markdown(LEADERBOARD_INTRODUCTION)
16
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
18
+ # Table 1
19
+ with gr.TabItem("📊 MMLU-Pro", elem_id="qa-tab-table1", id=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  with gr.Row():
21
+ with gr.Accordion("Citation", open=False):
22
+ citation_button = gr.Textbox(
23
+ value=CITATION_BUTTON_TEXT,
24
+ label=CITATION_BUTTON_LABEL,
25
+ elem_id="citation-button",
26
+ lines=10,
 
 
 
 
 
 
27
  )
28
+ gr.Markdown(TABLE_INTRODUCTION)
29
 
30
+ with gr.Row():
31
+ search_bar = gr.Textbox(
32
+ placeholder="Search models...",
33
+ show_label=False,
34
+ elem_id="search-bar"
35
+ )
36
+
37
+ df = get_df()
38
+ min_size, max_size = get_size_range(df)
 
 
 
 
 
 
 
39
 
40
+ with gr.Row():
41
+ min_size_slider = gr.Slider(
42
+ minimum=min_size,
43
+ maximum=max_size,
44
+ value=min_size,
45
+ step=0.1,
46
+ label="Minimum number of parameters (B)",
47
+ )
48
+ max_size_slider = gr.Slider(
49
+ minimum=min_size,
50
+ maximum=max_size,
51
+ value=max_size,
52
+ step=0.1,
53
+ label="Maximum number of parameters (B)",
54
+ )
55
+
56
+ subject_choices = [col for col in COLUMN_NAMES if col not in ['Models', 'Model Size(B)', 'Data Source', 'Overall', 'IND', 'OOD']]
57
+ with gr.Row():
58
+ subjects_select = gr.CheckboxGroup(
59
+ choices=subject_choices,
60
+ value=subject_choices,
61
+ label="Select Subjects to Display",
62
+ elem_id="subjects-select"
63
+ )
64
+
65
+ data_component = gr.components.Dataframe(
66
+ value=df[COLUMN_NAMES],
67
+ headers=COLUMN_NAMES,
68
+ type="pandas",
69
+ datatype=DATA_TITLE_TYPE,
70
+ interactive=False,
71
+ visible=True,
72
  )
73
+
74
+ refresh_button = gr.Button("Refresh")
75
+
76
+ def update_with_subjects(*args):
77
+ return update_table(*args)
78
+
79
+ search_bar.change(
80
+ fn=update_with_subjects,
81
+ inputs=[search_bar, min_size_slider, max_size_slider, subjects_select],
82
+ outputs=data_component
83
+ )
84
+ min_size_slider.change(
85
+ fn=update_with_subjects,
86
+ inputs=[search_bar, min_size_slider, max_size_slider, subjects_select],
87
+ outputs=data_component
88
+ )
89
+ max_size_slider.change(
90
+ fn=update_with_subjects,
91
+ inputs=[search_bar, min_size_slider, max_size_slider, subjects_select],
92
+ outputs=data_component
93
+ )
94
+ subjects_select.change(
95
+ fn=update_with_subjects,
96
+ inputs=[search_bar, min_size_slider, max_size_slider, subjects_select],
97
+ outputs=data_component
98
  )
99
+ refresh_button.click(fn=refresh_data, outputs=data_component)
100
+
101
+ # table 2
102
+ with gr.TabItem("📝 About", elem_id="qa-tab-table2", id=2):
103
+ gr.Markdown(LEADERBOARD_INFO, elem_classes="markdown-text")
104
+
105
+ # table 3
106
+ with gr.TabItem("🚀 Submit here! ", elem_id="submit-tab", id=3):
107
+ with gr.Row():
108
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
109
 
110
+ block.launch(share=True)
 
 
 
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Models,Model Size(B),Data Source,Overall,IND,OOD,Classification,VQA,Retrieval,Grounding
2
+ CLIP,unk,unk,37.8,37.1,38.7,42.8,9.1,53.0,51.8
3
+ BLIP2,unk,unk,25.2,25.3,25.1,27.0,4.2,33.9,47.0
4
+ SigLIP,unk,unk,34.8,32.3,38.0,40.3,8.4,31.6,59.5
5
+ OpenCLIP,unk,unk,39.7,39.3,40.2,47.8,10.9,52.3,53.3
6
+ UniIR (BLIP_FF),unk,unk,42.8,44.7,40.4,42.1,15.0,60.1,62.2
7
+ UniIR (CLIP_SF),unk,unk,44.7,47.1,41.7,44.3,16.2,61.8,65.3
8
+ E5-V,unk,unk,13.3,14.9,11.5,21.8,4.9,11.5,19.0
9
+ Magiclens,unk,unk,27.8,31.0,23.7,38.8,8.3,35.4,26.0
10
+ CLIP-FFT,unk,TIGER-Lab,45.4,47.6,42.8,55.2,19.7,53.2,62.2
11
+ OpenCLIP-FFT,unk,unk,47.2,50.5,43.1,56.0,21.9,55.4,64.1
12
+ VLM2Vec (Phi-3.5-V-FFT),unk,TIGER-Lab,55.9,62.8,47.4,52.8,50.3,57.8,72.3
13
+ VLM2Vec (Phi-3.5-V-LoRA),unk,TIGER-Lab,60.1,66.5,52.0,54.8,54.9,62.3,79.5
14
+ VLM2Vec (LLaVA-1.6-LoRA-LowRes),unk,TIGER-Lab,55.0,61.0,47.5,54.7,50.3,56.2,64.0
15
+ VLM2Vec (LLaVA-1.6-LoRA-HighRes),unk,TIGER-Lab,62.9,67.5,57.1,61.2,49.9,67.4,86.1
src/about.py CHANGED
@@ -21,7 +21,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
21
 
22
 
23
  # Your leaderboard name
24
- TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
 
21
 
22
 
23
  # Your leaderboard name
24
+ TITLE = """<h1 align="center" id="space-title">MMEB Leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
src/display/css_html_js.py CHANGED
@@ -48,6 +48,7 @@ custom_css = """
48
 
49
  .tab-buttons button {
50
  font-size: 20px;
 
51
  }
52
 
53
  #scale-logo {
 
48
 
49
  .tab-buttons button {
50
  font-size: 20px;
51
+ height: 1500px;
52
  }
53
 
54
  #scale-logo {
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import csv
4
+ import json
5
+ import os
6
+ import shutil
7
+ from huggingface_hub import Repository
8
+
9
+ HF_TOKEN = os.environ.get("HF_TOKEN")
10
+
11
+ SUBJECTS = ["Classification", "VQA", "Retrieval", "Grounding"]
12
+
13
+ MODEL_INFO = [
14
+ "Models", "Model Size(B)", "Data Source",
15
+ "Overall", "IND", "OOD",
16
+ "Classification", "VQA", "Retrieval", "Grounding"
17
+ ]
18
+
19
+ DATA_TITLE_TYPE = ['markdown', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
20
+
21
+ # TODO: submission process not implemented yet
22
+ SUBMISSION_NAME = ""
23
+ SUBMISSION_URL = ""
24
+ CSV_DIR = "results.csv" # TODO: Temporary file, to be updated with the actual file
25
+
26
+ COLUMN_NAMES = MODEL_INFO
27
+
28
+ LEADERBOARD_INTRODUCTION = """# MMEB Leaderboard
29
+
30
+ ## Introduction
31
+ We introduce MMEB, a benchmark for multimodal evaluation of models. The benchmark consists of four tasks: Classification, VQA, Retrieval, and Grounding. Models are evaluated based on 36 datasets.
32
+
33
+
34
+ """
35
+
36
+ TABLE_INTRODUCTION = """"""
37
+
38
+ LEADERBOARD_INFO = """
39
+ ## Dataset Summary
40
+ """
41
+
42
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
43
+ CITATION_BUTTON_TEXT = """"""
44
+
45
+ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
46
+
47
+ ## ⚠ Please note that you need to submit the JSON file with the following format:
48
+ ```json
49
+ [
50
+ {
51
+ "question_id": 123,
52
+ "question": "abc",
53
+ "options": ["abc", "xyz", ...],
54
+ "answer": "ABC",
55
+ "answer_index": 1,
56
+ "category": "abc,
57
+ "pred": "B",
58
+ "model_outputs": ""
59
+ }, ...
60
+ ]
61
+ ```
62
+ ...
63
+ """
64
+
65
+ def get_df():
66
+ # TODO: Update this after the hf dataset has been created!
67
+ # repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
68
+ # repo.git_pull()
69
+ df = pd.read_csv(CSV_DIR)
70
+ df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
71
+ df = df.sort_values(by=['Overall'], ascending=False)
72
+ return df
73
+
74
+
75
+ def add_new_eval(
76
+ input_file,
77
+ ):
78
+ if input_file is None:
79
+ return "Error! Empty file!"
80
+
81
+ upload_data = json.loads(input_file)
82
+ print("upload_data:\n", upload_data)
83
+ data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
84
+ for subject in SUBJECTS:
85
+ data_row += [upload_data[subject]]
86
+ print("data_row:\n", data_row)
87
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
88
+ use_auth_token=HF_TOKEN, repo_type="dataset")
89
+ submission_repo.git_pull()
90
+
91
+ already_submitted = []
92
+ with open(CSV_DIR, mode='r') as file:
93
+ reader = csv.reader(file, delimiter=',')
94
+ for row in reader:
95
+ already_submitted.append(row[0])
96
+
97
+ if data_row[0] not in already_submitted:
98
+ with open(CSV_DIR, mode='a', newline='') as file:
99
+ writer = csv.writer(file)
100
+ writer.writerow(data_row)
101
+
102
+ submission_repo.push_to_hub()
103
+ print('Submission Successful')
104
+ else:
105
+ print('The entry already exists')
106
+
107
+ def refresh_data():
108
+ df = get_df()
109
+ return df[COLUMN_NAMES]
110
+
111
+
112
+ def search_and_filter_models(df, query, min_size, max_size):
113
+ filtered_df = df.copy()
114
+
115
+ if query:
116
+ filtered_df = filtered_df[filtered_df['Models'].str.contains(query, case=False, na=False)]
117
+
118
+ size_mask = filtered_df['Model Size(B)'].apply(lambda x:
119
+ (min_size <= 1000.0 <= max_size) if x == 'unknown'
120
+ else (min_size <= x <= max_size))
121
+
122
+ filtered_df = filtered_df[size_mask]
123
+
124
+ return filtered_df[COLUMN_NAMES]
125
+
126
+
127
+ # def search_and_filter_models(df, query, min_size, max_size):
128
+ # filtered_df = df.copy()
129
+
130
+ # if query:
131
+ # filtered_df = filtered_df[filtered_df['Models'].str.contains(query, case=False, na=False)]
132
+
133
+ # def size_filter(x):
134
+ # if isinstance(x, (int, float)):
135
+ # return min_size <= x <= max_size
136
+ # return True
137
+
138
+ # filtered_df = filtered_df[filtered_df['Model Size(B)'].apply(size_filter)]
139
+
140
+ # return filtered_df[COLUMN_NAMES]
141
+
142
+
143
+ def search_models(df, query):
144
+ if query:
145
+ return df[df['Models'].str.contains(query, case=False, na=False)]
146
+ return df
147
+
148
+
149
+ # def get_size_range(df):
150
+ # numeric_sizes = df[df['Model Size(B)'].apply(lambda x: isinstance(x, (int, float)))]['Model Size(B)']
151
+ # if len(numeric_sizes) > 0:
152
+ # return float(numeric_sizes.min()), float(numeric_sizes.max())
153
+ # return 0, 1000
154
+
155
+
156
+ def get_size_range(df):
157
+ sizes = df['Model Size(B)'].apply(lambda x: 1000.0 if x == 'unknown' else x)
158
+ return float(sizes.min()), float(sizes.max())
159
+
160
+
161
+ def process_model_size(size):
162
+ if pd.isna(size) or size == 'unk':
163
+ return 'unknown'
164
+ try:
165
+ val = float(size)
166
+ return val
167
+ except (ValueError, TypeError):
168
+ return 'unknown'
169
+
170
+
171
+ def filter_columns_by_subjects(df, selected_subjects=None):
172
+ if selected_subjects is None or len(selected_subjects) == 0:
173
+ return df[COLUMN_NAMES]
174
+
175
+ base_columns = ['Models', 'Model Size(B)', 'Data Source', 'Overall']
176
+ selected_columns = base_columns + selected_subjects
177
+
178
+ available_columns = [col for col in selected_columns if col in df.columns]
179
+ return df[available_columns]
180
+
181
+ def get_subject_choices():
182
+ return SUBJECTS
183
+