Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

SocialAISchool / data_analysis_neurips.py

grg

Cleaned old git history

be5548b about 1 year ago

raw

history blame

22.4 kB

	#!/usr/bin/env python
	import seaborn
	import numpy as np
	import os
	from collections import OrderedDict
	import pandas as pd
	import matplotlib.pyplot as plt
	import sys
	from termcolor import cprint

	# Load data

	# Global vars for tracking and labeling data at load time.
	exp_idx = 0
	label_parser_dict = None

	smooth_factor = 10
	leg_size = 30

	subsample_step = 1
	load_subsample_step = 50

	default_colors = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
	"cyan", "pink",'purple', u'#1f77b4',
	"darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
	'deeppink','silver','khaki','goldenrod','y','y','y','y','y','y','y','y','y','y','y','y' ] + ['y']*50

	def get_all_runs(logdir, load_subsample_step=1):
	"""
	Recursively look through logdir for output files produced by
	Assumes that any file "progress.txt" is a valid hit.
	"""
	global exp_idx
	global units
	datasets = []
	for root, _, files in os.walk(logdir):
	if 'log.csv' in files:
	run_name = root[8:]
	exp_name = None

	# try to load a config file containing hyperparameters
	config = None
	try:
	config_path = open(os.path.join(root,'config.json'))
	config = json.load(config_path)
	if 'exp_name' in config:
	exp_name = config['exp_name']
	except:
	print('No file named config.json')

	exp_idx += 1

	# load progress data
	try:
	print(os.path.join(root,'log.csv'))
	exp_data = pd.read_csv(os.path.join(root,'log.csv'))
	except:
	raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))

	exp_data = exp_data[::load_subsample_step]
	data_dict = exp_data.to_dict("list")

	data_dict['config'] = config
	nb_epochs = len(data_dict['frames'])
	print('{} -> {}'.format(run_name, nb_epochs))


	datasets.append(data_dict)

	return datasets

	def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_pattern="ignore"):
	_, models_list, _ = next(os.walk(rootdir))
	print(models_list)
	for dir_name in models_list.copy():
	# add "ignore" in a directory name to avoid loading its content
	if ignore_pattern in dir_name or load_only not in dir_name:
	models_list.remove(dir_name)
	for expe_name in list(labels.keys()):
	if expe_name not in models_list:
	del labels[expe_name]

	# setting per-model type colors
	for i,m_name in enumerate(models_list):
	for m_type, m_color in per_model_colors.items():
	if m_type in m_name:
	colors[m_name] = m_color
	print("extracting data for {}...".format(m_name))
	m_id = m_name
	models_saves[m_id] = OrderedDict()
	models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
	print("done")
	if m_name not in labels:
	labels[m_name] = m_name

	"""
	retrieve all experiences located in "data to vizu" folder
	"""
	labels = OrderedDict()
	per_model_colors = OrderedDict()
	# per_model_colors = OrderedDict([('ALP-GMM',u'#1f77b4'),
	# ('hmn','pink'),
	# ('ADR','black')])

	# LOAD DATA
	models_saves = OrderedDict()
	colors = OrderedDict()

	static_lines = {}
	# get_datasets("storage/",load_only="RERUN_WizardGuide")
	# get_datasets("storage/",load_only="RERUN_WizardTwoGuides")
	try:
	figure_id = eval(sys.argv[1])
	except:
	figure_id = sys.argv[1]

	print("fig:", figure_id)
	if figure_id == 0:
	# train change
	env_type = "No_NPC_environment"
	fig_type = "train"

	get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_no_explo", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_curr_dial", load_subsample_step=load_subsample_step)
	top_n = 16
	elif figure_id == 1:
	# arch change
	env_type = "No_NPC_environment"
	fig_type = "arch"

	get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_bow", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_no_mem", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_bigru", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardGuide_lang64_attgru", load_subsample_step=load_subsample_step)
	top_n = 16
	elif figure_id == 2:
	# train change FULL
	env_type = "FULL_environment"
	fig_type = "train"

	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_explo", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_curr_dial", load_subsample_step=load_subsample_step)
	top_n = 16
	elif figure_id == 3:
	# arch change FULL
	env_type = "FULL_environment"
	fig_type = "arch"

	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bow", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_mem", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bigru", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_attgru", load_subsample_step=load_subsample_step)
	top_n = 16
	elif str(figure_id) == "ShowMe":

	get_datasets("storage/", "20-05_NeurIPS_ShowMe_ABL_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
	get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "20-05_NeurIPS_ShowMe_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
	get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_env", load_subsample_step=load_subsample_step)

	label_parser_dict = {
	"20-05_NeurIPS_ShowMe_ABL_CEB" : "ShowMe_exp_bonus_no_social_skills_required",
	"20-05_NeurIPS_ShowMe_NO_BONUS_ABL" : "ShowMe_no_bonus_no_social_skills_required",
	"20-05_NeurIPS_ShowMe_CEB" : "ShowMe_exp_bonus",
	"20-05_NeurIPS_ShowMe_NO_BONUS_env" : "ShowMe_no_bonus",
	}

	env_type = str(figure_id)

	fig_type = "test"
	top_n = 16

	elif str(figure_id) == "Help":

	# env_type = "Bobo"
	# get_datasets("storage/", "Bobo")
	get_datasets("storage/", "24-05_NeurIPS_Help", load_subsample_step=load_subsample_step, ignore_pattern="ABL")
	# get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)

	label_parser_dict = {
	"Help_NO_BONUS_env": "PPO",
	"Help_BONUS_env": "PPO+Explo",
	# "Help_NO_BONUS_ABL_env": "ExiterRole_no_bonus_no_NPC",
	# "Help_BONUS_ABL_env": "ExiterRole_bonus_no_NPC",
	"26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env": "Unsocial PPO",
	# "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL": "ExiterRole_Insocial_ABL"
	}

	static_lines = {
	"PPO (helper)": (0.12, 0.05, "#1f77b4"),
	"PPO+Explo (helper)": (0.11, 0.04, "indianred"),
	# "Help_exp_bonus": (0.11525, 0.04916 , default_colors[2]),
	# "HelperRole_ABL_no_exp_bonus": (0.022375, 0.01848, default_colors[3]),
	"Unsocial PPO (helper)": (0.15, 0.06, "grey"),
	# "HelperRole_ABL_Insocial": (0.01775, 0.010544, default_colors[4]),
	}

	env_type = str(figure_id)

	fig_type = "test"
	top_n = 16

	elif str(figure_id) == "TalkItOut":
	print("You mean Polite")
	exit()

	elif str(figure_id) == "TalkItOutPolite":
	# env_type = "TalkItOut"
	# get_datasets("storage/", "ORIENT_env_MiniGrid-TalkItOut")

	# env_type = "GuideThief"
	# get_datasets("storage/", "GuideThief")

	# env_type = "Bobo"
	# get_datasets("storage/", "Bobo")
	get_datasets("storage/", "20-05_NeurIPS_TalkItOutPolite", load_subsample_step=load_subsample_step)
	# get_datasets("storage/", "21-05_NeurIPS_small_bonus_TalkItOutPolite")
	get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar", load_subsample_step=load_subsample_step)

	label_parser_dict = {
	"TalkItOutPolite_NO_BONUS_env": "PPO",
	"TalkItOutPolite_e": "PPO+Explo",
	"TalkItOutPolite_NO_BONUS_NoLiar": "PPO (no liar)",
	"TalkItOutPolite_NoLiar_e": "PPO+Explo (no liar)",
	"26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env": "Unsocial PPO",
	"26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar": "Unsocial PPO (no liar)",
	}


	env_type = str(figure_id)

	fig_type = "test"
	top_n = 16

	elif str(figure_id) == "DiverseExit":
	get_datasets("storage/", "24-05_NeurIPS_DiverseExit", load_subsample_step=load_subsample_step)
	get_datasets("storage/", "26-05_NeurIPS_gpu_DiverseExit", load_subsample_step=load_subsample_step)

	label_parser_dict = {
	"DiverseExit_NO_BONUS": "No_bonus",
	"DiverseExit_BONUS": "BOnus",
	"gpu_DiverseExit_NoSocial": "No_social",
	}

	env_type = str(figure_id)

	fig_type = "test"
	top_n = 16

	else:
	get_datasets("storage/", str(figure_id), load_subsample_step=load_subsample_step)

	env_type = str(figure_id)

	fig_type = "test"
	top_n = 8

	#### get_datasets("storage/", "RERUN_WizardGuide_lang64_nameless")
	#### get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_nameless")


	if per_model_colors: # order runs for legend order as in per_models_colors, with corresponding colors
	ordered_labels = OrderedDict()
	for teacher_type in per_model_colors.keys():
	for k,v in labels.items():
	if teacher_type in k:
	ordered_labels[k] = v
	labels = ordered_labels
	else:
	print('not using per_model_color')
	for k in models_saves.keys():
	labels[k] = k

	def plot_with_shade(subplot_nb, ax,x,y,err,color,shade_color,label,
	y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
	ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
	zorder=None, xlabel='perf',ylabel='env steps'):
	#plt.rcParams.update({'font.size': 15})
	ax.locator_params(axis='x', nbins=4)
	ax.locator_params(axis='y', nbins=3)
	ax.tick_params(axis='both', which='major', labelsize=ticksize)
	ax.plot(x,y, color=color, label=label,linewidth=linewidth,zorder=zorder)
	ax.fill_between(x,y-err,y+err,color=shade_color,alpha=0.2)
	if legend:
	leg = ax.legend(loc=leg_loc, **leg_args) #34
	for legobj in leg.legendHandles:
	legobj.set_linewidth(leg_linewidth)
	ax.set_xlabel(xlabel, fontsize=30)
	if subplot_nb == 0:
	ax.set_ylabel(ylabel, fontsize=30,labelpad=-4)
	ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
	ax.set_ylim(bottom=ylim[0],top=ylim[1])
	if title:
	ax.set_title(title, fontsize=22)
	# Plot utils
	def plot_with_shade_grg(subplot_nb, ax,x,y,err,color,shade_color,label,
	y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
	ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
	zorder=None, xlabel='perf',ylabel='env steps', linestyle="-"):
	#plt.rcParams.update({'font.size': 15})
	ax.locator_params(axis='x', nbins=4)
	ax.locator_params(axis='y', nbins=3)
	ax.tick_params(axis='both', which='major', labelsize=ticksize)


	ax.plot(x, y, color=color, label=label,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
	ax.fill_between(x, y-err, y+err,color=shade_color,alpha=0.2)
	if legend:
	leg = ax.legend(loc=leg_loc, **leg_args) #34
	for legobj in leg.legendHandles:
	legobj.set_linewidth(leg_linewidth)
	ax.set_xlabel(xlabel, fontsize=30)
	if subplot_nb == 0:
	ax.set_ylabel(ylabel, fontsize=30, labelpad=-4)
	ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
	ax.set_ylim(bottom=ylim[0],top=ylim[1])
	if title:
	ax.set_title(title, fontsize=22)


	# Metric plot
	metric = 'bin_extrinsic_return_mean'
	# metric = 'mission_string_observed_mean'
	# metric = 'extrinsic_return_mean'
	# metric = 'extrinsic_return_max'
	# metric = "rreturn_mean"
	# metric = 'rreturn_max'
	# metric = 'FPS'

	f, ax = plt.subplots(1, 1, figsize=(10.0, 6.0))
	ax = [ax]
	max_y = -np.inf
	min_y = np.inf
	# hardcoded
	min_y, max_y = 0.0, 1.0
	max_steps = 0
	exclude_patterns = []
	include_patterns = []


	def label_parser(label, figure_id, label_parser_dict=None):
	if label_parser_dict:
	if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
	if label in label_parser_dict:
	# see if there is an exact match
	return label_parser_dict[label]
	else:
	print("ERROR multiple curves match a lable and there is no exact match")
	print(label)
	exit()

	for k, v in label_parser_dict.items():
	if k in label: return v

	else:
	# return label.split("_env_")[1]
	if figure_id not in [1,2,3,4]:
	return label
	else:
	label_parser_dict = {
	"RERUN_WizardGuide_lang64_no_explo": "MH-BabyAI",
	"RERUN_WizardTwoGuides_lang64_no_explo": "MH-BabyAI",

	"RERUN_WizardGuide_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",
	"RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",

	"RERUN_WizardGuide_lang64_deaf_no_explo": "Deaf-MH-BabyAI",
	"RERUN_WizardTwoGuides_lang64_deaf_no_explo": "Deaf-MH-BabyAI",

	"RERUN_WizardGuide_lang64_bow": "MH-BabyAI-ExpBonus-BOW",
	"RERUN_WizardTwoGuides_lang64_bow": "MH-BabyAI-ExpBonus-BOW",

	"RERUN_WizardGuide_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",
	"RERUN_WizardTwoGuides_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",

	"RERUN_WizardGuide_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",
	"RERUN_WizardTwoGuides_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",

	"RERUN_WizardGuide_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",
	"RERUN_WizardTwoGuides_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",

	"RERUN_WizardGuide_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",
	"RERUN_WizardTwoGuides_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",

	"RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_100M": "MH-BabyAI-ExpBonus-100M"
	}
	if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
	print("ERROR multiple curves match a lable")
	print(label)
	exit()

	for k, v in label_parser_dict.items():
	if k in label: return v

	return label

	per_seed=False

	for i, m_id in enumerate(models_saves.keys()):
	#excluding some experiments
	if any([ex_pat in m_id for ex_pat in exclude_patterns]):
	continue
	if len(include_patterns) > 0:
	if not any([in_pat in m_id for in_pat in include_patterns]):
	continue
	runs_data = models_saves[m_id]['data']
	ys = []

	# DIRTY FIX FOR FAULTY LOGGING
	print("m_id:", m_id)
	if runs_data[0]['frames'][1] == 'frames':
	runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))
	###########################################


	# determine minimal run length across seeds
	minimum = sorted([len(run['frames']) for run in runs_data if len(run['frames'])])[-top_n]
	min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])

	# min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])


	print("min_len:", min_len)

	#compute env steps (x axis)
	longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
	steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / 1000000
	steps = steps[:min_len]
	for run in runs_data:
	data = run[metric]
	# DIRTY FIX FOR FAULTY LOGGING (headers in data)
	if data[1] == metric:
	data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)
	###########################################
	if len(data) >= min_len:
	if len(data) > min_len:
	print("run has too many {} datapoints ({}). Discarding {}".format(m_id, len(data),
	len(data)-min_len))
	data = data[0:min_len]
	ys.append(data)
	ys_same_len = ys # RUNS MUST HAVE SAME LEN

	# computes stats
	n_seeds = len(ys_same_len)
	sems = np.std(ys_same_len,axis=0)/np.sqrt(len(ys_same_len)) # sem
	stds = np.std(ys_same_len,axis=0) # std
	means = np.mean(ys_same_len,axis=0)
	color = default_colors[i]

	# per-metric adjusments
	ylabel=metric
	if metric == 'bin_extrinsic_return_mean':
	ylabel = "success rate"
	if metric == 'duration':
	ylabel = "time (hours)"
	means = means / 3600
	sems = sems / 3600
	stds = stds / 3600

	#plot x y bounds
	curr_max_y = np.max(means)
	curr_min_y = np.min(means)
	curr_max_steps = np.max(steps)
	if curr_max_y > max_y:
	max_y = curr_max_y
	if curr_min_y < min_y:
	min_y = curr_min_y
	if curr_max_steps > max_steps:
	max_steps = curr_max_steps

	if subsample_step:
	steps = steps[0::subsample_step]
	means = means[0::subsample_step]
	stds = stds[0::subsample_step]
	sems = sems[0::subsample_step]
	ys_same_len = [y[0::subsample_step] for y in ys_same_len]

	# display seeds separtely
	if per_seed:
	for s_i, seed_ys in enumerate(ys_same_len):
	seed_c = default_colors[i+s_i]
	label = m_id#+"(s:{})".format(s_i)
	plot_with_shade(0, ax[0], steps, seed_ys, stds*0, seed_c, seed_c, label,
	legend=False, xlim=[0, max_steps], ylim=[min_y, max_y],
	leg_size=leg_size, xlabel="env steps (millions)", ylabel=ylabel, smooth_factor=smooth_factor,
	)
	else:
	label = label_parser(m_id, figure_id, label_parser_dict=label_parser_dict)
	label = label #+"({})".format(n_seeds)


	def smooth(x_, n=50):
	if type(x_) == list:
	x_ = np.array(x_)
	return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])
	if smooth_factor:
	means = smooth(means,smooth_factor)
	stds = smooth(stds,smooth_factor)
	x_lim = 30
	if figure_id == "TalkItOutPolite":
	leg_args = {
	'ncol': 1,
	'columnspacing': 1.0,
	'handlelength': 1.0,
	'frameon': False,
	# 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
	'bbox_to_anchor': (0.55, 0.35, 0.10, .102),
	'labelspacing': 0.2,
	'fontsize': 27
	}
	elif figure_id == "Help":
	leg_args = {
	'ncol': 1,
	'columnspacing': 1.0,
	'handlelength': 1.0,
	'frameon': False,
	# 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
	'bbox_to_anchor': (0.39, 0.20, 0.10, .102),
	'labelspacing': 0.2,
	'fontsize': 27
	}
	else:
	leg_args = {}

	color_code = dict([
	('PPO+Explo', 'indianred'),
	('PPO', "#1f77b4"),
	('Unsocial PPO', "grey"),
	('PPO (no liar)', "#043252"),
	('PPO+Explo (no liar)', "darkred"),
	('Unsocial PPO (no liar)', "black"),
	('PPO+Explo (helper)', 'indianred'),
	('PPO (helper)', "#1f77b4"),
	('Unsocial PPO (helper)', "grey")]
	)
	color = color_code.get(label, np.random.choice(default_colors))
	print("C:",color)
	plot_with_shade_grg(
	0, ax[0], steps, means, stds, color, color, label,
	legend=True,
	xlim=[0, steps[-1] if not x_lim else x_lim],
	ylim=[0, 1.0], xlabel="env steps (millions)", ylabel=ylabel, title=None,
	leg_args =leg_args)
	#
	# plot_with_shade(0, ax[0], steps, means, stds, color, color,label,
	# legend=True, xlim=[0, max_steps], ylim=[min_y, max_y],
	# leg_size=leg_size, xlabel="Env steps (millions)", ylabel=ylabel, linewidth=5.0, smooth_factor=smooth_factor)


	for label, (mean, std, color) in static_lines.items():
	plot_with_shade_grg(
	0, ax[0], steps, np.array([mean]len(steps)), np.array([std]len(steps)), color, color, label,
	legend=True,
	xlim=[0, max_steps],
	ylim=[0, 1.0],
	xlabel="env steps (millions)", ylabel=ylabel, linestyle=":",
	leg_args=leg_args)

	plt.tight_layout()
	f.savefig('graphics/{}_results.svg'.format(str(figure_id)))
	f.savefig('graphics/{}_results.png'.format(str(figure_id)))
	plt.show()