Spaces:

SUSTech
/

ChineseSafe-Benchmark

Running

App Files Files Community

ChineseSafe-Benchmark / app.py

JaydenCool

update files

d7041cd 8 months ago

raw

history blame

13.8 kB

	from typing import List

	import gradio as gr
	import numpy as np
	import pandas as pd
	from assets.text import INTRODUCTION_TEXT, METRICS_TEXT, EVALUTION_TEXT, ACKNOWLEDGEMENTS_TEXT, REFERENCE_TEXT


	ORIGINAL_DF = pd.read_csv("./data/chinese_benchmark_gen.csv", sep='\t') # space separated values
	ORIGINAL_DF_PER = pd.read_csv("./data/chinese_benchmark_per.csv", sep='\t') #

	ORIGINAL_DF_SUB_GEN = pd.read_csv("./data/subclass_gen.csv", sep=',') #
	ORIGINAL_DF_SUB_PER = pd.read_csv("./data/subclass_per.csv", sep=',')

	METRICS = ["Accuracy", "Precision_Unsafe", "Recall_Unsafe", "Precision_Safe", "Recall_Safe", "None"]


	SUBCLASS = ["Discrimination", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"]

	#SPLITS = ["Overall", "Subclass"]
	SPLITS = ["Overall", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"]

	CLASSIFICATION = {
	"model_size": [
	">65B",
	"~30B",
	"10B~20B",
	"5B~10B",
	"API",
	]

	}


	_BIBTEX = """ Waiting for paper ... """

	_LAST_UPDATED = "July 21, 2024"

	banner_url = "./assets/logo.png"
	_BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa




	def retrieve_array_from_text(text):
	return np.fromstring(text.replace("[", "").replace("]", ""), dtype=float, sep=",")

	def format_csv_numbers(text):
	return text.split('/')[0]

	def format_csv_numbers_second(text):
	return text.split()


	def format_number(x):
	return float(f"{x:.3}")


	def get_dataset_csv(
	model_size: List[str],
	):
	df = ORIGINAL_DF[ORIGINAL_DF['Size'].isin(model_size)]
	df = df.drop(columns="Size")

	# if metric_choice != "None":
	# metric_choice = metric_choice + "/std"
	# sort_basis = df[metric_choice].apply(format_csv_numbers)
	# sorted_indices = sort_basis.argsort()[::-1]
	# df = df.iloc[sorted_indices]

	leaderboard_table = gr.components.Dataframe(
	value=df,
	interactive=False,
	visible=True,
	)
	return leaderboard_table

	def get_dataset_csv_per(
	model_size: List[str],
	):
	df = ORIGINAL_DF_PER[ORIGINAL_DF_PER['Size'].isin(model_size)]
	df = df.drop(columns="Size")

	# if metric_choice != "None":
	# metric_choice = metric_choice + "/std"
	# sort_basis = df[metric_choice].apply(format_csv_numbers)
	# sorted_indices = sort_basis.argsort()[::-1]
	# df = df.iloc[sorted_indices]

	leaderboard_table = gr.components.Dataframe(
	value=df,
	interactive=False,
	visible=True,
	)
	return leaderboard_table

	# this is a sub function for csv table
	def get_dataset_csv_sub_gen(
	model_size: List[str],
	subclass_choice: List[str],
	):
	df = ORIGINAL_DF_SUB_GEN[ORIGINAL_DF_SUB_GEN['Size'].isin(model_size)]
	df = df.drop(columns="Size")

	# get subclass
	subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"]
	df = df[subclass_choice_label]

	# if metric_choice != "None":
	# # metric_choice = metric_choice + "/std"
	# metric_choice = metric_choice.split("_")[0]
	# metric_choice = subclass_choice + "_" + metric_choice
	# # sort_basis = df[metric_choice].apply(format_csv_numbers)
	# sort_basis = df[metric_choice]

	# sorted_indices = sort_basis.argsort()[::-1]
	# df = df.iloc[sorted_indices]

	leaderboard_table = gr.components.Dataframe(
	value=df,
	interactive=False,
	visible=True,
	)
	return leaderboard_table

	# this is a sub function for csv table
	def get_dataset_csv_sub_per(
	model_size: List[str],
	subclass_choice: List[str],
	):
	df = ORIGINAL_DF_SUB_PER[ORIGINAL_DF_SUB_PER['Size'].isin(model_size)]
	df = df.drop(columns="Size")

	# get subclass
	subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"]
	df = df[subclass_choice_label]

	# if metric_choice != "None":
	# # metric_choice = metric_choice + "/std"
	# metric_choice = metric_choice.split("_")[0]
	# metric_choice = subclass_choice + "_" + metric_choice
	# # sort_basis = df[metric_choice].apply(format_csv_numbers)
	# sort_basis = df[metric_choice]

	# sorted_indices = sort_basis.argsort()[::-1]
	# df = df.iloc[sorted_indices]

	leaderboard_table = gr.components.Dataframe(
	value=df,
	interactive=False,
	visible=True,
	)
	return leaderboard_table


	def get_dataset_classfier_gen(
	model_size: List[str],
	main_choice: List[str],
	):
	if main_choice == "Overall":
	leaderboard_table = get_dataset_csv(model_size)
	elif main_choice != "Subclass":
	subclass_choice = main_choice
	leaderboard_table = get_dataset_csv_sub_gen(model_size, subclass_choice)
	return leaderboard_table

	def get_dataset_classfier_per(
	model_size: List[str],
	main_choice: List[str],
	):
	if main_choice == "Overall":
	leaderboard_table = get_dataset_csv_per(model_size)
	elif main_choice != "Overall":
	subclass_choice = main_choice
	leaderboard_table = get_dataset_csv_sub_per(model_size, subclass_choice)
	return leaderboard_table

	with gr.Blocks() as demo:
	gr.Markdown("<center><h1>ChineseSafe Leaderboard</h1></center>", elem_classes="markdown-text")
	with gr.Row():
	#gr.Image(banner_url, height=160, scale=1) # 👉 this part is for image
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
	# gr.Textbox(_INTRODUCTION_TEXT, scale=5)

	with gr.Row():
	gr.Markdown(METRICS_TEXT, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown(EVALUTION_TEXT, elem_classes="markdown-text")

	with gr.Row():
	with gr.Column(scale=0.8):
	main_choice = gr.Dropdown(
	choices=SPLITS,
	value="Overall",
	label="Type",
	info="Please choose the type to display.",
	)

	# with gr.Column(scale=0.8):
	# metric_choice = gr.Dropdown(
	# choices=METRICS,
	# value="None",
	# label="Metric",
	# info="Please choose the metric to display.",
	# )

	with gr.Column(scale=10):
	model_choice = gr.CheckboxGroup(
	choices=CLASSIFICATION["model_size"],
	value=CLASSIFICATION["model_size"], # all be choosed
	label="Model Size",
	info="Please choose the model size to display.",
	)


	# with gr.Column(scale=0.8):
	# subclass_choice = gr.Dropdown(
	# choices=SUBCLASS,
	# value="Discrimination",
	# label="Subclass",
	# info="Please choose the subclass to display.",
	# )


	#👉 this part is for csv table generatived
	with gr.Tabs(elem_classes="tab-buttons") as tabs:

	# with gr.TabItem("🏅 Overall Generatived", elem_id="od-benchmark-tab-table", id=1):
	# dataframe = gr.components.Dataframe(
	# elem_id="leaderboard-table",
	# )
	# #👉 this part is for csv table perplexity
	# with gr.TabItem("🏅 Overall Perplexity", elem_id="od-benchmark-tab-table", id=2):
	# datafram_per = gr.components.Dataframe(
	# elem_id="leaderboard-table",
	# )

	# #👉 this part is for csv subclass table generatived
	# with gr.TabItem("🏅 Subclass Generatived", elem_id="od-benchmark-tab-table", id=3):
	# dataframe_sub_gen = gr.components.Dataframe(
	# elem_id="leaderboard-table",
	# )

	# #👉 this part is for csv subclass table perplexity
	# with gr.TabItem("🏅 Subclass Perplexity", elem_id="od-benchmark-tab-table", id=4):
	# dataframe_sub_per = gr.components.Dataframe(
	# elem_id="leaderboard-table",
	# )
	# ----------------- modify text -----------------

	with gr.TabItem("🏅 Generation", elem_id="od-benchmark-tab-table", id=6):
	dataframe_all_gen = gr.components.Dataframe(
	elem_id="leaderboard-table",
	)

	with gr.TabItem("🏅 Multiple Choice", elem_id="od-benchmark-tab-table", id=5):
	dataframe_all_per = gr.components.Dataframe(
	elem_id="leaderboard-table",
	)

	# ----------------- modify text -----------------
	with gr.Row():
	gr.Markdown(ACKNOWLEDGEMENTS_TEXT, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown(REFERENCE_TEXT, elem_classes="markdown-text")


	gr.Markdown(f"Last updated on {_LAST_UPDATED}", elem_classes="markdown-text")

	# 👉 this part is for citation
	# with gr.Row():
	# with gr.Accordion("📙 Citation", open=False):
	# gr.Textbox(
	# value=_BIBTEX,
	# lines=7,
	# label="Copy the BibTeX snippet to cite this source",
	# elem_id="citation-button",
	# show_copy_button=True
	# )

	# this is result based on generative
	# metric_choice.change(
	# get_dataset_csv,
	# inputs=[model_choice, metric_choice],
	# outputs=dataframe,
	# )

	# model_choice.change(
	# get_dataset_csv,
	# inputs=[model_choice, metric_choice],
	# outputs=dataframe,
	# )

	# demo.load(
	# fn=get_dataset_csv,
	# inputs=[model_choice, metric_choice],
	# outputs=dataframe,
	# )

	# # this is result based on Perplexity
	# metric_choice.change(
	# get_dataset_csv_per,
	# inputs=[model_choice, metric_choice],
	# outputs=datafram_per,
	# )

	# model_choice.change(
	# get_dataset_csv_per,
	# inputs=[model_choice, metric_choice],
	# outputs=datafram_per,
	# )

	# demo.load(
	# fn=get_dataset_csv_per,
	# inputs=[model_choice, metric_choice],
	# outputs=datafram_per,
	# )

	# this is subclass result generatived
	# metric_choice.change(
	# get_dataset_csv_sub_gen,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_gen,
	# )

	# model_choice.change(
	# get_dataset_csv_sub_gen,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_gen,
	# )

	# subclass_choice.change(
	# get_dataset_csv_sub_gen,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_gen,
	# )

	# demo.load(
	# fn=get_dataset_csv_sub_gen,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_gen,
	# )

	# # this is subclass result Perplexity
	# # metric_choice.change(
	# # get_dataset_csv_sub_per,
	# # inputs=[model_choice, metric_choice, subclass_choice],
	# # outputs=dataframe_sub_per,
	# # )

	# model_choice.change(
	# get_dataset_csv_sub_per,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_per,
	# )

	# subclass_choice.change(
	# get_dataset_csv_sub_per,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_per,
	# )

	# demo.load(
	# fn=get_dataset_csv_sub_per,
	# inputs=[model_choice, metric_choice, subclass_choice],
	# outputs=dataframe_sub_per,
	# )

	# --------------------------- all --------------------------------
	# this is all result Perplexity

	main_choice.change(
	get_dataset_classfier_per,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_per,
	)

	model_choice.change(
	get_dataset_classfier_per,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_per,
	)

	# metric_choice.change(
	# get_dataset_classfier_per,
	# inputs=[model_choice, main_choice],
	# outputs=dataframe_all_per,
	# )

	# subclass_choice.change(
	# get_dataset_classfier_per,
	# inputs=[model_choice, metric_choice, main_choice],
	# outputs=dataframe_all_per,
	# )

	demo.load(
	fn=get_dataset_classfier_per,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_per,
	)

	# this is all result generatived
	main_choice.change(
	get_dataset_classfier_gen,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_gen,
	)

	model_choice.change(
	get_dataset_classfier_gen,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_gen,
	)

	# metric_choice.change(
	# get_dataset_classfier_gen,
	# inputs=[model_choice, metric_choice, main_choice],
	# outputs=dataframe_all_gen,
	# )

	# subclass_choice.change(
	# get_dataset_classfier_gen,
	# inputs=[model_choice, metric_choice, main_choice],
	# outputs=dataframe_all_gen,
	# )

	demo.load(
	fn=get_dataset_classfier_gen,
	inputs=[model_choice, main_choice],
	outputs=dataframe_all_gen,
	)


	demo.launch()