AtsuMiyai
update
86af462
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import gradio as gr
import pandas as pd
import re
import pandas as pd
import numpy as np
from collections import defaultdict
from constants import *
import os
from huggingface_hub import Repository
import json
global data_component, filter_component
TOKEN = os.environ.get("TOKEN")
repo = Repository(local_dir="./download_from_dataset", clone_from="JMMMU/leaderboard_result", repo_type="dataset", use_auth_token=TOKEN)
current_directory = os.getcwd()
def validate_model_size(s):
pattern = r'^\d+B$|^-$'
if re.match(pattern, s):
return s
else:
return '-'
def upload_file(files):
file_paths = [file.name for file in files]
return file_paths
def get_acc(data, subject_list):
acc = 0
for subject in subject_list:
acc += data["results"][subject]['jmmmu_acc,none']
acc = acc/len(subject_list)
acc = acc * 100
acc = round(acc, 1)
return acc
def calculate_score(input_file):
json_string = input_file.decode('utf-8')
data = json.loads(json_string)
result_dict = {}
overall = data["results"]["jmmmu"]['jmmmu_acc,none']*100
ca = data["results"]["culture_agnostic"]['jmmmu_acc,none']*100
cs = data["results"]["culture_specific"]['jmmmu_acc,none']*100
overall = round(overall, 1)
ca = round(ca, 1)
cs = round(cs, 1)
# Art_Psychology
art_psychology_subject_list = ["jmmmu_design", "jmmmu_music", "jmmmu_psychology"]
# Science
science_subject_list = ["jmmmu_biology", "jmmmu_chemistry", "jmmmu_physics", "jmmmu_math"]
# Business
business_subject_list = ["jmmmu_accounting", "jmmmu_economics", "jmmmu_finance", "jmmmu_manage", "jmmmu_marketing"]
# Medicine
medicine_subject_list = ["jmmmu_basic_medical_science", "jmmmu_clinical_medicine", "jmmmu_diagnostics_and_laboratory_medicine", "jmmmu_pharmacy", "jmmmu_public_health"]
# Tech_Eng.
tech_eng_subject_list = ["jmmmu_agriculture", "jmmmu_architecture_and_engineering", "jmmmu_computer_science", "jmmmu_electronics", "jmmmu_energy_and_power", "jmmmu_materials", "jmmmu_mechanical_engineering"]
jmmmu_japanese_art_subject_list = ["jmmmu_japanese_art"]
jmmmu_japanese_heritage_subject_list = ["jmmmu_japanese_heritage"]
jmmmu_japanese_history_subject_list = ["jmmmu_japanese_history"]
jmmmu_world_history_subject_list = ["jmmmu_world_history"]
art_psychology = get_acc(data, art_psychology_subject_list)
science = get_acc(data, science_subject_list)
business = get_acc(data, business_subject_list)
medicine = get_acc(data, medicine_subject_list)
tech_eng = get_acc(data, tech_eng_subject_list)
japanese_art = get_acc(data, jmmmu_japanese_art_subject_list)
japanese_heritage = get_acc(data, jmmmu_japanese_heritage_subject_list)
japanese_history = get_acc(data, jmmmu_japanese_history_subject_list)
world_history = get_acc(data, jmmmu_world_history_subject_list)
result_dict =\
{
"overall": overall,
"cultureSpecific": cs,
"cultureAgnostic": ca,
"japaneseArt": japanese_art,
"japaneseHeritage": japanese_heritage,
"japaneseHistory": japanese_history,
"worldHistory": world_history,
"artPsychology": art_psychology,
"business": business,
"science": science,
"healthMedicine": medicine,
"techEngineering": tech_eng
}
return result_dict
def add_new_eval(
input_file,
model_type: str,
model_name_textbox: str,
revision_name_textbox: str,
model_link: str,
model_size: str,
# upd_type: str,
# question_type: str
):
if input_file is None:
warning_text = "Error! Empty file!"
print(warning_text)
return warning_text
else:
model_size = validate_model_size(model_size)
# if upd_type == 'AAD':
csv_path = CSV_RESULT_PATH
# validity_check(input_file)
csv_data = pd.read_csv(csv_path)
result_dict = calculate_score(input_file)
if revision_name_textbox == '':
col = csv_data.shape[0]
model_name = model_name_textbox
else:
model_name = revision_name_textbox
model_name_list = csv_data['Model']
name_list = [name.split(']')[0][1:] for name in model_name_list]
if revision_name_textbox not in name_list:
col = csv_data.shape[0]
else:
col = name_list.index(revision_name_textbox)
model_name_wo_link = model_name
if model_link == '':
model_name = model_name # no url
else:
model_name = '[' + model_name + '](' + model_link + ')'
# add new data
new_data = [
model_type,
model_name,
model_size,
result_dict["overall"],
result_dict["cultureSpecific"],
result_dict["cultureAgnostic"],
result_dict["japaneseArt"],
result_dict["japaneseHeritage"],
result_dict["japaneseHistory"],
result_dict["worldHistory"],
result_dict["artPsychology"],
result_dict["business"],
result_dict["science"],
result_dict["healthMedicine"],
result_dict["techEngineering"]
]
# If the same data already exists, return an error.
if new_data in csv_data.values.tolist():
warning_text = "Error! The same data already exists!"
print(warning_text)
return warning_text
# If the same model name already exists, return an error.
elif new_data[:5] in csv_data.values.tolist():
warning_text = "Error! The same data already exists! Please fill revision_name."
print(warning_text)
return warning_text
csv_data.loc[col] = new_data
csv_data = csv_data.to_csv(csv_path, index=False)
absolute_result_path = os.path.abspath(csv_path)
if not os.path.exists(absolute_result_path):
raise FileNotFoundError(f"File {absolute_result_path} not found")
repo.git_pull()
repo.git_add(absolute_result_path)
save_path = os.path.join(CSV_QUEUE_DIR, f"{model_name_wo_link}.json")
with open(save_path, "wb") as f:
f.write(input_file)
absolute_queue_path = os.path.abspath(save_path)
repo.git_add(absolute_queue_path)
repo.git_commit(f"add {model_name_wo_link} results")
repo.git_push()
print(f"Success! Your {model_name_wo_link} has been added!")
return 0
def get_baseline_df():
repo.git_pull()
df = pd.read_csv(CSV_RESULT_PATH)
df = df.sort_values(by="Overall", ascending=False)
present_columns = MODEL_INFO + checkbox_group.value
df = df[present_columns]
return df
def get_all_df():
repo.git_pull()
df = pd.read_csv(CSV_RESULT_PATH)
df = df.sort_values(by="Overall", ascending=False)
return df
block = gr.Blocks()
with block:
gr.Markdown(
LEADERBORAD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# table jmmmu bench
with gr.TabItem("πŸ… JMMMU Benchmark", elem_id="jmmmu-benchmark-tab-table", id=1):
# selection for column part:
checkbox_group = gr.CheckboxGroup(
choices=TASK_INFO,
value=AVG_INFO,
label="Evaluation Dimension",
interactive=True,
) # user can select the evaluation dimension
with gr.Row():
# selection for model size part:
model_size = gr.CheckboxGroup(
choices=MODEL_SIZE,
value=MODEL_SIZE,
label="Model Size",
interactive=True,
)
baseline_value = get_baseline_df()
baseline_header = MODEL_INFO + checkbox_group.value
baseline_datatype = ['markdown'] * 2 + ['number'] * len(checkbox_group.value)
data_component = gr.components.Dataframe(
value=baseline_value,
headers=baseline_header,
type="pandas",
datatype=baseline_datatype,
interactive=False,
visible=True,
)
def on_filter_model_size_method_change(selected_model_size, selected_columns):
updated_data = get_all_df()
# model_size
def custom_filter(row, model_size_filters):
model_size = row['Model Size']
model_size = model_size.upper()
if model_size == '-':
size_filter = '-' in model_size_filters
elif 'B' in model_size:
size = float(model_size.replace('B', ''))
size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10)
else:
size_filter = False
return size_filter
mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size)
updated_data = updated_data[mask]
# columns:
selected_columns = [item for item in TASK_INFO if item in selected_columns]
present_columns = MODEL_INFO + selected_columns
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
updated_headers = present_columns
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component
model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component)
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, checkbox_group], outputs=data_component)
# table 5
with gr.TabItem("πŸš€ Submit here! ", elem_id="jmmmu-benchmark-tab-table", id=5):
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_type = gr.Dropdown(
choices=["LMM", "LLM"],
label="Model type",
multiselect=False,
value="LMM",
interactive=True,
)
model_name_textbox = gr.Textbox(
label="Model name", placeholder="LLaMA-7B"
)
revision_name_textbox = gr.Textbox(
label="Revision Model Name", placeholder="LLaMA-7B"
)
model_link = gr.Textbox(
label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
)
model_size = gr.Textbox(
label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')"
)
with gr.Column():
input_file = gr.components.File(label="Click to Upload a JSON File", file_count="single", type='binary')
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
inputs = [
input_file,
model_type,
model_name_textbox,
revision_name_textbox,
model_link,
model_size
],
)
def refresh_data():
value = get_baseline_df()
return value
with gr.Row():
data_run = gr.Button("Refresh")
data_run.click(
refresh_data, outputs=[data_component]
)
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
block.launch()