Spaces:
Running
Running
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
import gradio as gr | |
import pandas as pd | |
import re | |
import pandas as pd | |
import numpy as np | |
from collections import defaultdict | |
from constants import * | |
import os | |
from huggingface_hub import Repository | |
import json | |
global data_component, filter_component | |
TOKEN = os.environ.get("TOKEN") | |
repo = Repository(local_dir="./download_from_dataset", clone_from="JMMMU/leaderboard_result", repo_type="dataset", use_auth_token=TOKEN) | |
current_directory = os.getcwd() | |
def validate_model_size(s): | |
pattern = r'^\d+B$|^-$' | |
if re.match(pattern, s): | |
return s | |
else: | |
return '-' | |
def upload_file(files): | |
file_paths = [file.name for file in files] | |
return file_paths | |
def get_acc(data, subject_list): | |
acc = 0 | |
for subject in subject_list: | |
acc += data["results"][subject]['jmmmu_acc,none'] | |
acc = acc/len(subject_list) | |
acc = acc * 100 | |
acc = round(acc, 1) | |
return acc | |
def calculate_score(input_file): | |
json_string = input_file.decode('utf-8') | |
data = json.loads(json_string) | |
result_dict = {} | |
overall = data["results"]["jmmmu"]['jmmmu_acc,none']*100 | |
ca = data["results"]["culture_agnostic"]['jmmmu_acc,none']*100 | |
cs = data["results"]["culture_specific"]['jmmmu_acc,none']*100 | |
overall = round(overall, 1) | |
ca = round(ca, 1) | |
cs = round(cs, 1) | |
# Art_Psychology | |
art_psychology_subject_list = ["jmmmu_design", "jmmmu_music", "jmmmu_psychology"] | |
# Science | |
science_subject_list = ["jmmmu_biology", "jmmmu_chemistry", "jmmmu_physics", "jmmmu_math"] | |
# Business | |
business_subject_list = ["jmmmu_accounting", "jmmmu_economics", "jmmmu_finance", "jmmmu_manage", "jmmmu_marketing"] | |
# Medicine | |
medicine_subject_list = ["jmmmu_basic_medical_science", "jmmmu_clinical_medicine", "jmmmu_diagnostics_and_laboratory_medicine", "jmmmu_pharmacy", "jmmmu_public_health"] | |
# Tech_Eng. | |
tech_eng_subject_list = ["jmmmu_agriculture", "jmmmu_architecture_and_engineering", "jmmmu_computer_science", "jmmmu_electronics", "jmmmu_energy_and_power", "jmmmu_materials", "jmmmu_mechanical_engineering"] | |
jmmmu_japanese_art_subject_list = ["jmmmu_japanese_art"] | |
jmmmu_japanese_heritage_subject_list = ["jmmmu_japanese_heritage"] | |
jmmmu_japanese_history_subject_list = ["jmmmu_japanese_history"] | |
jmmmu_world_history_subject_list = ["jmmmu_world_history"] | |
art_psychology = get_acc(data, art_psychology_subject_list) | |
science = get_acc(data, science_subject_list) | |
business = get_acc(data, business_subject_list) | |
medicine = get_acc(data, medicine_subject_list) | |
tech_eng = get_acc(data, tech_eng_subject_list) | |
japanese_art = get_acc(data, jmmmu_japanese_art_subject_list) | |
japanese_heritage = get_acc(data, jmmmu_japanese_heritage_subject_list) | |
japanese_history = get_acc(data, jmmmu_japanese_history_subject_list) | |
world_history = get_acc(data, jmmmu_world_history_subject_list) | |
result_dict =\ | |
{ | |
"overall": overall, | |
"cultureSpecific": cs, | |
"cultureAgnostic": ca, | |
"japaneseArt": japanese_art, | |
"japaneseHeritage": japanese_heritage, | |
"japaneseHistory": japanese_history, | |
"worldHistory": world_history, | |
"artPsychology": art_psychology, | |
"business": business, | |
"science": science, | |
"healthMedicine": medicine, | |
"techEngineering": tech_eng | |
} | |
return result_dict | |
def add_new_eval( | |
input_file, | |
model_type: str, | |
model_name_textbox: str, | |
revision_name_textbox: str, | |
model_link: str, | |
model_size: str, | |
# upd_type: str, | |
# question_type: str | |
): | |
if input_file is None: | |
warning_text = "Error! Empty file!" | |
print(warning_text) | |
return warning_text | |
else: | |
model_size = validate_model_size(model_size) | |
# if upd_type == 'AAD': | |
csv_path = CSV_RESULT_PATH | |
# validity_check(input_file) | |
csv_data = pd.read_csv(csv_path) | |
result_dict = calculate_score(input_file) | |
if revision_name_textbox == '': | |
col = csv_data.shape[0] | |
model_name = model_name_textbox | |
else: | |
model_name = revision_name_textbox | |
model_name_list = csv_data['Model'] | |
name_list = [name.split(']')[0][1:] for name in model_name_list] | |
if revision_name_textbox not in name_list: | |
col = csv_data.shape[0] | |
else: | |
col = name_list.index(revision_name_textbox) | |
model_name_wo_link = model_name | |
if model_link == '': | |
model_name = model_name # no url | |
else: | |
model_name = '[' + model_name + '](' + model_link + ')' | |
# add new data | |
new_data = [ | |
model_type, | |
model_name, | |
model_size, | |
result_dict["overall"], | |
result_dict["cultureSpecific"], | |
result_dict["cultureAgnostic"], | |
result_dict["japaneseArt"], | |
result_dict["japaneseHeritage"], | |
result_dict["japaneseHistory"], | |
result_dict["worldHistory"], | |
result_dict["artPsychology"], | |
result_dict["business"], | |
result_dict["science"], | |
result_dict["healthMedicine"], | |
result_dict["techEngineering"] | |
] | |
# If the same data already exists, return an error. | |
if new_data in csv_data.values.tolist(): | |
warning_text = "Error! The same data already exists!" | |
print(warning_text) | |
return warning_text | |
# If the same model name already exists, return an error. | |
elif new_data[:5] in csv_data.values.tolist(): | |
warning_text = "Error! The same data already exists! Please fill revision_name." | |
print(warning_text) | |
return warning_text | |
csv_data.loc[col] = new_data | |
csv_data = csv_data.to_csv(csv_path, index=False) | |
absolute_result_path = os.path.abspath(csv_path) | |
if not os.path.exists(absolute_result_path): | |
raise FileNotFoundError(f"File {absolute_result_path} not found") | |
repo.git_pull() | |
repo.git_add(absolute_result_path) | |
save_path = os.path.join(CSV_QUEUE_DIR, f"{model_name_wo_link}.json") | |
with open(save_path, "wb") as f: | |
f.write(input_file) | |
absolute_queue_path = os.path.abspath(save_path) | |
repo.git_add(absolute_queue_path) | |
repo.git_commit(f"add {model_name_wo_link} results") | |
repo.git_push() | |
print(f"Success! Your {model_name_wo_link} has been added!") | |
return 0 | |
def get_baseline_df(): | |
repo.git_pull() | |
df = pd.read_csv(CSV_RESULT_PATH) | |
df = df.sort_values(by="Overall", ascending=False) | |
present_columns = MODEL_INFO + checkbox_group.value | |
df = df[present_columns] | |
return df | |
def get_all_df(): | |
repo.git_pull() | |
df = pd.read_csv(CSV_RESULT_PATH) | |
df = df.sort_values(by="Overall", ascending=False) | |
return df | |
block = gr.Blocks() | |
with block: | |
gr.Markdown( | |
LEADERBORAD_INTRODUCTION | |
) | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
# table jmmmu bench | |
with gr.TabItem("π JMMMU Benchmark", elem_id="jmmmu-benchmark-tab-table", id=1): | |
# selection for column part: | |
checkbox_group = gr.CheckboxGroup( | |
choices=TASK_INFO, | |
value=AVG_INFO, | |
label="Evaluation Dimension", | |
interactive=True, | |
) # user can select the evaluation dimension | |
with gr.Row(): | |
# selection for model size part: | |
model_size = gr.CheckboxGroup( | |
choices=MODEL_SIZE, | |
value=MODEL_SIZE, | |
label="Model Size", | |
interactive=True, | |
) | |
baseline_value = get_baseline_df() | |
baseline_header = MODEL_INFO + checkbox_group.value | |
baseline_datatype = ['markdown'] * 2 + ['number'] * len(checkbox_group.value) | |
data_component = gr.components.Dataframe( | |
value=baseline_value, | |
headers=baseline_header, | |
type="pandas", | |
datatype=baseline_datatype, | |
interactive=False, | |
visible=True, | |
) | |
def on_filter_model_size_method_change(selected_model_size, selected_columns): | |
updated_data = get_all_df() | |
# model_size | |
def custom_filter(row, model_size_filters): | |
model_size = row['Model Size'] | |
model_size = model_size.upper() | |
if model_size == '-': | |
size_filter = '-' in model_size_filters | |
elif 'B' in model_size: | |
size = float(model_size.replace('B', '')) | |
size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10) | |
else: | |
size_filter = False | |
return size_filter | |
mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size) | |
updated_data = updated_data[mask] | |
# columns: | |
selected_columns = [item for item in TASK_INFO if item in selected_columns] | |
present_columns = MODEL_INFO + selected_columns | |
updated_data = updated_data[present_columns] | |
updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False) | |
updated_headers = present_columns | |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers] | |
filter_component = gr.components.Dataframe( | |
value=updated_data, | |
headers=updated_headers, | |
type="pandas", | |
datatype=update_datatype, | |
interactive=False, | |
visible=True, | |
) | |
return filter_component | |
model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component) | |
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component) | |
# table 5 | |
with gr.TabItem("π Submit here! ", elem_id="jmmmu-benchmark-tab-table", id=5): | |
with gr.Row(): | |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown("# βοΈβ¨ Submit your model evaluation json file here!", elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Column(): | |
model_type = gr.Dropdown( | |
choices=["LMM", "LLM"], | |
label="Model type", | |
multiselect=False, | |
value="LMM", | |
interactive=True, | |
) | |
model_name_textbox = gr.Textbox( | |
label="Model name", placeholder="LLaMA-7B" | |
) | |
revision_name_textbox = gr.Textbox( | |
label="Revision Model Name", placeholder="LLaMA-7B" | |
) | |
model_link = gr.Textbox( | |
label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf" | |
) | |
model_size = gr.Textbox( | |
label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')" | |
) | |
with gr.Column(): | |
input_file = gr.components.File(label="Click to Upload a JSON File", file_count="single", type='binary') | |
submit_button = gr.Button("Submit Eval") | |
submission_result = gr.Markdown() | |
submit_button.click( | |
add_new_eval, | |
inputs = [ | |
input_file, | |
model_type, | |
model_name_textbox, | |
revision_name_textbox, | |
model_link, | |
model_size | |
], | |
) | |
def refresh_data(): | |
value = get_baseline_df() | |
return value | |
with gr.Row(): | |
data_run = gr.Button("Refresh") | |
data_run.click( | |
refresh_data, outputs=[data_component] | |
) | |
with gr.Accordion("Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
block.launch() | |