Spaces:

AI-Secure
/

MMDT-radar

Running

App Files Files Community

MMDT-radar / generate_plot.py

polaris73

update font

bf25481 4 months ago

raw

history blame contribute delete

9.14 kB

	import plotly.colors
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import os
	import matplotlib.pyplot as plt
	import argparse
	from utils.score_extract.ood_agg import ood_t2i_agg, ood_i2t_agg

	DEFAULT_PLOTLY_COLORS = plotly.colors.DEFAULT_PLOTLY_COLORS


	def to_rgba(rgb, alpha=1):
	return 'rgba' + rgb[3:][:-1] + f', {alpha})'

	def radar_plot(results, thetas, selected_models):
	# Extract performance values for each model across all benchmarks
	model_performance = {}
	selected_models = [os.path.basename(model) for model in selected_models]
	for model in selected_models:
	if model in results:
	benchmarks_data = results[model]
	model_performance[model] = [benchmarks_data[subfield] for subfield in benchmarks_data.keys()]

	# Create radar chart with plotly
	fig = make_subplots(
	rows=2, cols=1,
	shared_xaxes=True,
	vertical_spacing=0.2,
	row_heights=[1, 0.4],
	specs=[[{"type": "polar"}], [{"type": "table"}]]
	)

	for i, (model, performance) in enumerate(model_performance.items()):
	color = DEFAULT_PLOTLY_COLORS[i % len(DEFAULT_PLOTLY_COLORS)]

	fig.add_trace(
	go.Scatterpolar(
	r=performance + [performance[0]],
	theta=thetas + [thetas[0]],
	fill='toself',
	connectgaps=True,
	fillcolor=to_rgba(color, 0.1),
	name=model.split('/')[-1], # Use the last part of the model name for clarity
	),
	row=1, col=1
	)

	header_texts = ["Model"] + [x.replace("<br>", " ") for x in thetas]
	rows = [[x.split('/')[-1] for x in selected_models]] + [[round(score[i], 2) for score in [model_performance[x] for x in selected_models]] for i in range(len(thetas))]
	# column_widths = [len(x) for x in header_texts]
	# column_widths[0] *= len(thetas)

	fig.add_trace(
	go.Table(
	header=dict(values=header_texts, font=dict(size=14.5), align="left"),
	cells=dict(
	values=rows,
	align="left",
	font=dict(size=14.5),
	height=30
	),
	# columnwidth=column_widths
	),
	row=2, col=1
	)

	fig.update_layout(
	height=900,
	legend=dict(font=dict(size=20), orientation="h", xanchor="center", x=0.5, y=0.35),
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 100], # Assuming accuracy is a percentage between 0 and 100
	tickfont=dict(size=12)
	),
	angularaxis=dict(tickfont=dict(size=20), type="category")
	),
	showlegend=True,
	# title=f"{title}"
	)

	return fig


	def main_radar_plot(main_scores, selected_models):
	fig = make_subplots(
	rows=2, cols=1,
	shared_xaxes=True,
	vertical_spacing=0.2,
	row_heights=[1.0, 0.5],
	specs=[[{"type": "polar"}], [{"type": "table"}]]
	)
	model_scores = {}
	for model in selected_models:
	model_name = os.path.basename(model)
	model_scores[model_name] = main_scores[model_name]
	perspectives = list(model_scores[os.path.basename(selected_models[0])].keys())
	perspectives_shift = perspectives
	for i, model_name in enumerate(model_scores.keys()):
	color = DEFAULT_PLOTLY_COLORS[i % len(DEFAULT_PLOTLY_COLORS)]
	score_shifted = list(model_scores[model_name].values())
	fig.add_trace(
	go.Scatterpolar(
	r=score_shifted + [score_shifted[0]],
	theta=perspectives_shift + [perspectives_shift[0]],
	connectgaps=True,
	fill='toself',
	fillcolor=to_rgba(color, 0.1),
	name=model_name, # Use the last part of the model name for clarity
	),
	row=1, col=1
	)

	header_texts = ["Model"] + perspectives
	rows = [
	list(model_scores.keys()), # Model Names
	*[[round(score[perspective], 2) for score in list(model_scores.values())] for perspective in perspectives]
	]
	column_widths = [10] + [5] * len(perspectives)

	fig.add_trace(
	go.Table(
	header=dict(values=header_texts, font=dict(size=14.5), align="left"),
	cells=dict(
	values=rows,
	align="left",
	font=dict(size=14.5),
	height=30,
	),
	columnwidth=column_widths,
	),
	row=2, col=1
	)


	fig.update_layout(
	height=1200,
	legend=dict(font=dict(size=20), orientation="h", xanchor="center", x=0.5, y=0.4),
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 100], # Assuming accuracy is a percentage between 0 and 100
	tickfont=dict(size=12)
	),
	angularaxis=dict(tickfont=dict(size=20), type="category", rotation=5)
	),
	showlegend=True,
	title=dict(text="MM-DecodingTrust Scores (Higher is Better)"),
	)
	return fig


	def breakdown_plot(scenario_results, subfields, selected_models):
	fig = radar_plot(scenario_results, subfields, selected_models)
	return fig

	def update_subscores(target_model, main_scores, config_dicts):
	perspectives = []
	target_model = target_model.split('/')[-1]
	curr_main_scores = {}
	curr_main_scores[target_model] = {}
	for perspective in main_scores[target_model].keys():
	curr_main_scores[target_model][config_dicts[perspective]["name"]] = main_scores[target_model][perspective]
	perspectives.append(config_dicts[perspective]["name"])
	return curr_main_scores

	def generate_plot(model, main_scores, sub_scores, config_dict, out_path="plots"):
	curr_main_scores = update_subscores(model, main_scores, config_dict)
	for idx, perspective in enumerate(config_dict.keys()):
	if config_dict[perspective]["sub_plot"] == False:
	continue
	# if "openai/gpt-4-0314" not in sub_scores[perspective].keys():
	# model_list = [model]
	# else:
	# model_list = [model, "openai/gpt-4-0314"]
	model_list = [model]
	subplot = breakdown_plot(sub_scores[perspective], list(sub_scores[perspective][model].keys()), model_list)
	perspective_name = config_dict[perspective]["name"].replace(" ", "_")
	subplot.write_image(f"{out_path}/{perspective_name}_breakdown.png", width=1400, height=700)
	plot = main_radar_plot(curr_main_scores, [model])
	plot.write_image(f"{out_path}/main.png", width=1400, height=700)

	def generate_main_plot(models, main_scores):
	curr_main_scores = main_scores
	plot = main_radar_plot(curr_main_scores, models)
	return plot
	# plot.write_image(f"{out_path}/main.png", width=1400, height=700)
	def generate_sub_plot(models, sub_scores, perspective):
	subplot = breakdown_plot(sub_scores[perspective], list(sub_scores[perspective][models[0]].keys()), models)
	return subplot

	if __name__ == "__main__":
	# parser = argparse.ArgumentParser()
	# parser.add_argument("--model", type=str, default="hf/meta-llama/Llama-2-7b-chat-hf")
	# args = parser.parse_args()
	t2i_models = [ # Average time spent running the following example
	"dall-e-2",
	"dall-e-3",
	"DeepFloyd/IF-I-M-v1.0", # 15.372
	"dreamlike-art/dreamlike-photoreal-2.0", # 3.526
	"prompthero/openjourney-v4", # 4.981
	"stabilityai/stable-diffusion-xl-base-1.0", # 7.463
	]
	i2t_models = [ # Average time spent running the following example
	"gpt-4-vision-preview",
	"gpt-4o-2024-05-13",
	"llava-hf/llava-v1.6-vicuna-7b-hf"
	]
	perspectives = ["Safety", "Fairness", "Hallucination", "Privacy", "Adv", "OOD"]
	main_scores_t2i = {}
	main_scores_i2t = {}
	sub_scores_t2i = {}
	sub_scores_i2t = {}
	for model in t2i_models:
	model = model.split("/")[-1]
	main_scores_t2i[model] = {}
	for perspective in perspectives:
	# Place holder
	main_scores_t2i[model][perspective] = ood_t2i_agg(model, "./data/results")["score"]
	if perspective not in sub_scores_t2i.keys():
	sub_scores_t2i[perspective] = {}
	sub_scores_t2i[perspective][model] = ood_t2i_agg(model, "./data/results")["subscenarios"]


	for model in i2t_models:
	model = model.split("/")[-1]
	main_scores_i2t[model] = {}
	for perspective in perspectives:
	# Place holder
	main_scores_i2t[model][perspective] = ood_i2t_agg(model, "./data/results")["score"]
	if perspective not in sub_scores_i2t.keys():
	sub_scores_i2t[perspective] = {}
	sub_scores_i2t[perspective][model] = ood_i2t_agg(model, "./data/results")["subscenarios"]

	# generate_main_plot(t2i_models, main_scores_t2i)
	# generate_main_plot(i2t_models, main_scores_i2t)

	generate_sub_plot(t2i_models, sub_scores_t2i, "ood")
	# generate_sub_plot(i2t_models, sub_scores_i2t)