Spaces:
Runtime error
Runtime error
import os | |
import json | |
import csv | |
import datetime | |
from email.utils import parseaddr | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from datasets import load_dataset | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import HfApi | |
from scorer import instruction_scorer | |
from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink | |
TOKEN = os.environ.get("TOKEN", None) | |
OWNER="ucla-contextual" | |
TEST_DATASET = f"{OWNER}/contextual_test" | |
VAL_DATASET = f"{OWNER}/contextual_val" | |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal" | |
CONTACT_DATASET = f"{OWNER}/contact_info" | |
RESULTS_DATASET = f"{OWNER}/results" | |
LEADERBOARD_PATH = f"{OWNER}/leaderboard" | |
api = HfApi() | |
YEAR_VERSION = "2024" | |
def read_json_file(filepath): | |
with open(filepath) as infile: | |
data_dict = json.load(infile) | |
return data_dict | |
def save_json_file(filepath, data_dict): | |
with open(filepath, "w") as outfile: | |
json.dump(data_dict, outfile) | |
os.makedirs("scored", exist_ok=True) | |
test_data_files = {"test": "contextual_test.csv"} | |
test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) | |
val_data_files = {"val": "contextual_val.csv"} | |
val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) | |
results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} | |
results = load_dataset(RESULTS_DATASET, data_files= | |
results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) | |
contacts_data_files = {"contacts": "contacts.csv"} | |
contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) | |
def get_dataframe_from_results(results, split): | |
df = results[split].to_pandas() | |
df.drop(columns=['URL'], inplace=True) | |
df = df.sort_values(by=["All"], ascending=False) | |
return df | |
test_dataset_dataframe = test_dataset["test"].to_pandas() | |
val_dataset_dataframe = val_dataset["val"].to_pandas() | |
contacts_dataframe = contact_infos["contacts"].to_pandas() | |
val_results_dataframe = get_dataframe_from_results(results=results, split="val") | |
test_results_dataframe = get_dataframe_from_results(results=results, split="test") | |
def restart_space(): | |
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) | |
TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"] | |
def add_new_eval( | |
model: str, | |
method: str, | |
url: str, | |
path_to_file: str, | |
organisation: str, | |
mail: str, | |
): | |
print("printing all inputs:", model, method, url, path_to_file, organisation, mail) | |
if len(model)==0: | |
print("model none") | |
raise gr.Error("Please provide a model name. Field empty!") | |
if len(method)==0: | |
print("method none") | |
raise gr.Error("Please provide a method. Field empty!") | |
if len(organisation)==0: | |
print("org none") | |
raise gr.Error("Please provide organisation information. Field empty!") | |
# Very basic email parsing | |
_, parsed_mail = parseaddr(mail) | |
if not "@" in parsed_mail: | |
print("email here") | |
raise gr.Error("Please provide a valid email address.") | |
# Check if the combination model/org already exists and prints a warning message if yes | |
if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]): | |
print("model org combo here") | |
raise gr.Error("This model has been already submitted.") | |
if path_to_file is None: | |
print("file missing here") | |
raise gr.Error("Please attach a file.") | |
tmp_file_output = read_json_file(path_to_file.name) | |
if len(tmp_file_output.keys())!=1: | |
print("file format wrong here") | |
raise gr.Error("Submission file format incorrect. Please refer to the format description!") | |
tmp_output_key = list(tmp_file_output.keys())[0] | |
if len(tmp_file_output[tmp_output_key].keys())!=100: | |
print("file not 100 here") | |
raise gr.Error("File must contain exactly 100 predictions.") | |
# Save submitted file | |
time_atm = datetime.datetime.today() | |
api.upload_file( | |
repo_id=SUBMISSION_DATASET, | |
path_or_fileobj=path_to_file.name, | |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
# Compute score | |
file_path = path_to_file.name | |
scores = instruction_scorer(val_dataset_dataframe, file_path , model) | |
path_or_fileobj=f"scored/{organisation}_{model}.json" | |
save_json_file(path_or_fileobj, scores) | |
# Save scored file | |
api.upload_file( | |
repo_id=SUBMISSION_DATASET, | |
path_or_fileobj=path_or_fileobj, | |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
# Actual submission | |
eval_entry = { | |
"Model": model, | |
"Method":method, | |
"Organisation": organisation, | |
"URL": url, | |
"All":scores["average"], | |
"Time":scores["time"], | |
"Shopping":scores["shopping"], | |
"Navigation":scores["navigation-transportation"], | |
"Abstract":scores["abstract"], | |
"Application Usage":scores["app"], | |
"Web Usage":scores["web"], | |
"Infographic":scores["infographics"], | |
"Miscellaneous Natural Scenes": scores["misc"] | |
} | |
val_results_dataframe = get_dataframe_from_results(results=results, split="val") | |
val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True) | |
val_results_dataframe.to_csv('contextual_val_results.csv', index=False) | |
api.upload_file( | |
repo_id=RESULTS_DATASET, | |
path_or_fileobj="contextual_val_results.csv", | |
path_in_repo=f"contextual_val_results.csv", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
contact_info = { | |
"Model": model, | |
"URL": url, | |
"Organisation": organisation, | |
"Mail": mail, | |
} | |
contacts_dataframe = contact_infos["contacts"].to_pandas() | |
contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True) | |
contacts_dataframe.to_csv('contacts.csv', index=False) | |
api.upload_file( | |
repo_id=CONTACT_DATASET, | |
path_or_fileobj="contacts.csv", | |
path_in_repo=f"contacts.csv", | |
repo_type="dataset", | |
token=TOKEN | |
) | |
return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed") | |
def refresh(): | |
results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} | |
results = load_dataset(RESULTS_DATASET, data_files= | |
results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) | |
val_results_dataframe = get_dataframe_from_results(results=results, split="val") | |
test_results_dataframe = get_dataframe_from_results(results=results, split="test") | |
return val_results_dataframe, test_results_dataframe | |
def upload_file(files): | |
file_paths = [file.name for file in files] | |
return file_paths | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(TITLE) | |
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π§ Introduction", open=False): | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π― Submission Guidelines", open=False): | |
gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
citation_button = gr.TextArea( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
) | |
with gr.Tab("Results: Test"): | |
leaderboard_table_test = gr.components.Dataframe( | |
value=test_results_dataframe, datatype=TYPES, interactive=False, | |
column_widths=["20%"] | |
) | |
with gr.Tab("Results: Val"): | |
leaderboard_table_val = gr.components.Dataframe( | |
value=val_results_dataframe, datatype=TYPES, interactive=False, | |
column_widths=["20%"] | |
) | |
refresh_button = gr.Button("Refresh") | |
refresh_button.click( | |
refresh, | |
inputs=[], | |
outputs=[ | |
leaderboard_table_val, | |
leaderboard_table_test, | |
], | |
) | |
with gr.Accordion("Submit a new model for evaluation"): | |
with gr.Row(): | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name", type='text') | |
method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text') | |
url_textbox = gr.Textbox(label="URL to model information", type='text') | |
with gr.Column(): | |
organisation = gr.Textbox(label="Organisation", type='text') | |
mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email') | |
file_output = gr.File() | |
submit_button = gr.Button("Submit Eval") | |
submission_result = gr.Markdown() | |
submit_button.click( | |
add_new_eval, | |
[ | |
model_name_textbox, | |
method_textbox, | |
url_textbox, | |
file_output, | |
organisation, | |
], | |
submission_result, | |
) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(restart_space, "interval", seconds=3600) | |
scheduler.start() | |
demo.launch(debug=True) | |