JaydenCool's picture
update files
d7041cd
raw
history blame
13.8 kB
from typing import List
import gradio as gr
import numpy as np
import pandas as pd
from assets.text import INTRODUCTION_TEXT, METRICS_TEXT, EVALUTION_TEXT, ACKNOWLEDGEMENTS_TEXT, REFERENCE_TEXT
ORIGINAL_DF = pd.read_csv("./data/chinese_benchmark_gen.csv", sep='\t') # space separated values
ORIGINAL_DF_PER = pd.read_csv("./data/chinese_benchmark_per.csv", sep='\t') #
ORIGINAL_DF_SUB_GEN = pd.read_csv("./data/subclass_gen.csv", sep=',') #
ORIGINAL_DF_SUB_PER = pd.read_csv("./data/subclass_per.csv", sep=',')
METRICS = ["Accuracy", "Precision_Unsafe", "Recall_Unsafe", "Precision_Safe", "Recall_Safe", "None"]
SUBCLASS = ["Discrimination", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"]
#SPLITS = ["Overall", "Subclass"]
SPLITS = ["Overall", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"]
CLASSIFICATION = {
"model_size": [
">65B",
"~30B",
"10B~20B",
"5B~10B",
"API",
]
}
_BIBTEX = """ Waiting for paper ... """
_LAST_UPDATED = "July 21, 2024"
banner_url = "./assets/logo.png"
_BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa
def retrieve_array_from_text(text):
return np.fromstring(text.replace("[", "").replace("]", ""), dtype=float, sep=",")
def format_csv_numbers(text):
return text.split('/')[0]
def format_csv_numbers_second(text):
return text.split()
def format_number(x):
return float(f"{x:.3}")
def get_dataset_csv(
model_size: List[str],
):
df = ORIGINAL_DF[ORIGINAL_DF['Size'].isin(model_size)]
df = df.drop(columns="Size")
# if metric_choice != "None":
# metric_choice = metric_choice + "/std"
# sort_basis = df[metric_choice].apply(format_csv_numbers)
# sorted_indices = sort_basis.argsort()[::-1]
# df = df.iloc[sorted_indices]
leaderboard_table = gr.components.Dataframe(
value=df,
interactive=False,
visible=True,
)
return leaderboard_table
def get_dataset_csv_per(
model_size: List[str],
):
df = ORIGINAL_DF_PER[ORIGINAL_DF_PER['Size'].isin(model_size)]
df = df.drop(columns="Size")
# if metric_choice != "None":
# metric_choice = metric_choice + "/std"
# sort_basis = df[metric_choice].apply(format_csv_numbers)
# sorted_indices = sort_basis.argsort()[::-1]
# df = df.iloc[sorted_indices]
leaderboard_table = gr.components.Dataframe(
value=df,
interactive=False,
visible=True,
)
return leaderboard_table
# this is a sub function for csv table
def get_dataset_csv_sub_gen(
model_size: List[str],
subclass_choice: List[str],
):
df = ORIGINAL_DF_SUB_GEN[ORIGINAL_DF_SUB_GEN['Size'].isin(model_size)]
df = df.drop(columns="Size")
# get subclass
subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"]
df = df[subclass_choice_label]
# if metric_choice != "None":
# # metric_choice = metric_choice + "/std"
# metric_choice = metric_choice.split("_")[0]
# metric_choice = subclass_choice + "_" + metric_choice
# # sort_basis = df[metric_choice].apply(format_csv_numbers)
# sort_basis = df[metric_choice]
# sorted_indices = sort_basis.argsort()[::-1]
# df = df.iloc[sorted_indices]
leaderboard_table = gr.components.Dataframe(
value=df,
interactive=False,
visible=True,
)
return leaderboard_table
# this is a sub function for csv table
def get_dataset_csv_sub_per(
model_size: List[str],
subclass_choice: List[str],
):
df = ORIGINAL_DF_SUB_PER[ORIGINAL_DF_SUB_PER['Size'].isin(model_size)]
df = df.drop(columns="Size")
# get subclass
subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"]
df = df[subclass_choice_label]
# if metric_choice != "None":
# # metric_choice = metric_choice + "/std"
# metric_choice = metric_choice.split("_")[0]
# metric_choice = subclass_choice + "_" + metric_choice
# # sort_basis = df[metric_choice].apply(format_csv_numbers)
# sort_basis = df[metric_choice]
# sorted_indices = sort_basis.argsort()[::-1]
# df = df.iloc[sorted_indices]
leaderboard_table = gr.components.Dataframe(
value=df,
interactive=False,
visible=True,
)
return leaderboard_table
def get_dataset_classfier_gen(
model_size: List[str],
main_choice: List[str],
):
if main_choice == "Overall":
leaderboard_table = get_dataset_csv(model_size)
elif main_choice != "Subclass":
subclass_choice = main_choice
leaderboard_table = get_dataset_csv_sub_gen(model_size, subclass_choice)
return leaderboard_table
def get_dataset_classfier_per(
model_size: List[str],
main_choice: List[str],
):
if main_choice == "Overall":
leaderboard_table = get_dataset_csv_per(model_size)
elif main_choice != "Overall":
subclass_choice = main_choice
leaderboard_table = get_dataset_csv_sub_per(model_size, subclass_choice)
return leaderboard_table
with gr.Blocks() as demo:
gr.Markdown("<center><h1>ChineseSafe Leaderboard</h1></center>", elem_classes="markdown-text")
with gr.Row():
#gr.Image(banner_url, height=160, scale=1) # πŸ‘‰ this part is for image
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
# gr.Textbox(_INTRODUCTION_TEXT, scale=5)
with gr.Row():
gr.Markdown(METRICS_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown(EVALUTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Column(scale=0.8):
main_choice = gr.Dropdown(
choices=SPLITS,
value="Overall",
label="Type",
info="Please choose the type to display.",
)
# with gr.Column(scale=0.8):
# metric_choice = gr.Dropdown(
# choices=METRICS,
# value="None",
# label="Metric",
# info="Please choose the metric to display.",
# )
with gr.Column(scale=10):
model_choice = gr.CheckboxGroup(
choices=CLASSIFICATION["model_size"],
value=CLASSIFICATION["model_size"], # all be choosed
label="Model Size",
info="Please choose the model size to display.",
)
# with gr.Column(scale=0.8):
# subclass_choice = gr.Dropdown(
# choices=SUBCLASS,
# value="Discrimination",
# label="Subclass",
# info="Please choose the subclass to display.",
# )
#πŸ‘‰ this part is for csv table generatived
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# with gr.TabItem("πŸ… Overall Generatived", elem_id="od-benchmark-tab-table", id=1):
# dataframe = gr.components.Dataframe(
# elem_id="leaderboard-table",
# )
# #πŸ‘‰ this part is for csv table perplexity
# with gr.TabItem("πŸ… Overall Perplexity", elem_id="od-benchmark-tab-table", id=2):
# datafram_per = gr.components.Dataframe(
# elem_id="leaderboard-table",
# )
# #πŸ‘‰ this part is for csv subclass table generatived
# with gr.TabItem("πŸ… Subclass Generatived", elem_id="od-benchmark-tab-table", id=3):
# dataframe_sub_gen = gr.components.Dataframe(
# elem_id="leaderboard-table",
# )
# #πŸ‘‰ this part is for csv subclass table perplexity
# with gr.TabItem("πŸ… Subclass Perplexity", elem_id="od-benchmark-tab-table", id=4):
# dataframe_sub_per = gr.components.Dataframe(
# elem_id="leaderboard-table",
# )
# ----------------- modify text -----------------
with gr.TabItem("πŸ… Generation", elem_id="od-benchmark-tab-table", id=6):
dataframe_all_gen = gr.components.Dataframe(
elem_id="leaderboard-table",
)
with gr.TabItem("πŸ… Multiple Choice", elem_id="od-benchmark-tab-table", id=5):
dataframe_all_per = gr.components.Dataframe(
elem_id="leaderboard-table",
)
# ----------------- modify text -----------------
with gr.Row():
gr.Markdown(ACKNOWLEDGEMENTS_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown(REFERENCE_TEXT, elem_classes="markdown-text")
gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
# πŸ‘‰ this part is for citation
# with gr.Row():
# with gr.Accordion("πŸ“™ Citation", open=False):
# gr.Textbox(
# value=_BIBTEX,
# lines=7,
# label="Copy the BibTeX snippet to cite this source",
# elem_id="citation-button",
# show_copy_button=True
# )
# this is result based on generative
# metric_choice.change(
# get_dataset_csv,
# inputs=[model_choice, metric_choice],
# outputs=dataframe,
# )
# model_choice.change(
# get_dataset_csv,
# inputs=[model_choice, metric_choice],
# outputs=dataframe,
# )
# demo.load(
# fn=get_dataset_csv,
# inputs=[model_choice, metric_choice],
# outputs=dataframe,
# )
# # this is result based on Perplexity
# metric_choice.change(
# get_dataset_csv_per,
# inputs=[model_choice, metric_choice],
# outputs=datafram_per,
# )
# model_choice.change(
# get_dataset_csv_per,
# inputs=[model_choice, metric_choice],
# outputs=datafram_per,
# )
# demo.load(
# fn=get_dataset_csv_per,
# inputs=[model_choice, metric_choice],
# outputs=datafram_per,
# )
# this is subclass result generatived
# metric_choice.change(
# get_dataset_csv_sub_gen,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_gen,
# )
# model_choice.change(
# get_dataset_csv_sub_gen,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_gen,
# )
# subclass_choice.change(
# get_dataset_csv_sub_gen,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_gen,
# )
# demo.load(
# fn=get_dataset_csv_sub_gen,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_gen,
# )
# # this is subclass result Perplexity
# # metric_choice.change(
# # get_dataset_csv_sub_per,
# # inputs=[model_choice, metric_choice, subclass_choice],
# # outputs=dataframe_sub_per,
# # )
# model_choice.change(
# get_dataset_csv_sub_per,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_per,
# )
# subclass_choice.change(
# get_dataset_csv_sub_per,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_per,
# )
# demo.load(
# fn=get_dataset_csv_sub_per,
# inputs=[model_choice, metric_choice, subclass_choice],
# outputs=dataframe_sub_per,
# )
# --------------------------- all --------------------------------
# this is all result Perplexity
main_choice.change(
get_dataset_classfier_per,
inputs=[model_choice, main_choice],
outputs=dataframe_all_per,
)
model_choice.change(
get_dataset_classfier_per,
inputs=[model_choice, main_choice],
outputs=dataframe_all_per,
)
# metric_choice.change(
# get_dataset_classfier_per,
# inputs=[model_choice, main_choice],
# outputs=dataframe_all_per,
# )
# subclass_choice.change(
# get_dataset_classfier_per,
# inputs=[model_choice, metric_choice, main_choice],
# outputs=dataframe_all_per,
# )
demo.load(
fn=get_dataset_classfier_per,
inputs=[model_choice, main_choice],
outputs=dataframe_all_per,
)
# this is all result generatived
main_choice.change(
get_dataset_classfier_gen,
inputs=[model_choice, main_choice],
outputs=dataframe_all_gen,
)
model_choice.change(
get_dataset_classfier_gen,
inputs=[model_choice, main_choice],
outputs=dataframe_all_gen,
)
# metric_choice.change(
# get_dataset_classfier_gen,
# inputs=[model_choice, metric_choice, main_choice],
# outputs=dataframe_all_gen,
# )
# subclass_choice.change(
# get_dataset_classfier_gen,
# inputs=[model_choice, metric_choice, main_choice],
# outputs=dataframe_all_gen,
# )
demo.load(
fn=get_dataset_classfier_gen,
inputs=[model_choice, main_choice],
outputs=dataframe_all_gen,
)
demo.launch()