SocialAISchool / data_analysis_neurips.py
grg's picture
Cleaned old git history
be5548b
raw
history blame
22.4 kB
#!/usr/bin/env python
import seaborn
import numpy as np
import os
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt
import sys
from termcolor import cprint
# Load data
# Global vars for tracking and labeling data at load time.
exp_idx = 0
label_parser_dict = None
smooth_factor = 10
leg_size = 30
subsample_step = 1
load_subsample_step = 50
default_colors = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
"cyan", "pink",'purple', u'#1f77b4',
"darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
'deeppink','silver','khaki','goldenrod','y','y','y','y','y','y','y','y','y','y','y','y' ] + ['y']*50
def get_all_runs(logdir, load_subsample_step=1):
"""
Recursively look through logdir for output files produced by
Assumes that any file "progress.txt" is a valid hit.
"""
global exp_idx
global units
datasets = []
for root, _, files in os.walk(logdir):
if 'log.csv' in files:
run_name = root[8:]
exp_name = None
# try to load a config file containing hyperparameters
config = None
try:
config_path = open(os.path.join(root,'config.json'))
config = json.load(config_path)
if 'exp_name' in config:
exp_name = config['exp_name']
except:
print('No file named config.json')
exp_idx += 1
# load progress data
try:
print(os.path.join(root,'log.csv'))
exp_data = pd.read_csv(os.path.join(root,'log.csv'))
except:
raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))
exp_data = exp_data[::load_subsample_step]
data_dict = exp_data.to_dict("list")
data_dict['config'] = config
nb_epochs = len(data_dict['frames'])
print('{} -> {}'.format(run_name, nb_epochs))
datasets.append(data_dict)
return datasets
def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_pattern="ignore"):
_, models_list, _ = next(os.walk(rootdir))
print(models_list)
for dir_name in models_list.copy():
# add "ignore" in a directory name to avoid loading its content
if ignore_pattern in dir_name or load_only not in dir_name:
models_list.remove(dir_name)
for expe_name in list(labels.keys()):
if expe_name not in models_list:
del labels[expe_name]
# setting per-model type colors
for i,m_name in enumerate(models_list):
for m_type, m_color in per_model_colors.items():
if m_type in m_name:
colors[m_name] = m_color
print("extracting data for {}...".format(m_name))
m_id = m_name
models_saves[m_id] = OrderedDict()
models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
print("done")
if m_name not in labels:
labels[m_name] = m_name
"""
retrieve all experiences located in "data to vizu" folder
"""
labels = OrderedDict()
per_model_colors = OrderedDict()
# per_model_colors = OrderedDict([('ALP-GMM',u'#1f77b4'),
# ('hmn','pink'),
# ('ADR','black')])
# LOAD DATA
models_saves = OrderedDict()
colors = OrderedDict()
static_lines = {}
# get_datasets("storage/",load_only="RERUN_WizardGuide")
# get_datasets("storage/",load_only="RERUN_WizardTwoGuides")
try:
figure_id = eval(sys.argv[1])
except:
figure_id = sys.argv[1]
print("fig:", figure_id)
if figure_id == 0:
# train change
env_type = "No_NPC_environment"
fig_type = "train"
get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_no_explo", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_curr_dial", load_subsample_step=load_subsample_step)
top_n = 16
elif figure_id == 1:
# arch change
env_type = "No_NPC_environment"
fig_type = "arch"
get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_bow", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_no_mem", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_bigru", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardGuide_lang64_attgru", load_subsample_step=load_subsample_step)
top_n = 16
elif figure_id == 2:
# train change FULL
env_type = "FULL_environment"
fig_type = "train"
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_deaf_no_explo", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_explo", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_curr_dial", load_subsample_step=load_subsample_step)
top_n = 16
elif figure_id == 3:
# arch change FULL
env_type = "FULL_environment"
fig_type = "arch"
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bow", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_mem", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bigru", load_subsample_step=load_subsample_step)
get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_attgru", load_subsample_step=load_subsample_step)
top_n = 16
elif str(figure_id) == "ShowMe":
get_datasets("storage/", "20-05_NeurIPS_ShowMe_ABL_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
get_datasets("storage/", "20-05_NeurIPS_ShowMe_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3")
get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_env", load_subsample_step=load_subsample_step)
label_parser_dict = {
"20-05_NeurIPS_ShowMe_ABL_CEB" : "ShowMe_exp_bonus_no_social_skills_required",
"20-05_NeurIPS_ShowMe_NO_BONUS_ABL" : "ShowMe_no_bonus_no_social_skills_required",
"20-05_NeurIPS_ShowMe_CEB" : "ShowMe_exp_bonus",
"20-05_NeurIPS_ShowMe_NO_BONUS_env" : "ShowMe_no_bonus",
}
env_type = str(figure_id)
fig_type = "test"
top_n = 16
elif str(figure_id) == "Help":
# env_type = "Bobo"
# get_datasets("storage/", "Bobo")
get_datasets("storage/", "24-05_NeurIPS_Help", load_subsample_step=load_subsample_step, ignore_pattern="ABL")
# get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL", load_subsample_step=load_subsample_step)
get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)
label_parser_dict = {
"Help_NO_BONUS_env": "PPO",
"Help_BONUS_env": "PPO+Explo",
# "Help_NO_BONUS_ABL_env": "ExiterRole_no_bonus_no_NPC",
# "Help_BONUS_ABL_env": "ExiterRole_bonus_no_NPC",
"26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env": "Unsocial PPO",
# "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL": "ExiterRole_Insocial_ABL"
}
static_lines = {
"PPO (helper)": (0.12, 0.05, "#1f77b4"),
"PPO+Explo (helper)": (0.11, 0.04, "indianred"),
# "Help_exp_bonus": (0.11525, 0.04916 , default_colors[2]),
# "HelperRole_ABL_no_exp_bonus": (0.022375, 0.01848, default_colors[3]),
"Unsocial PPO (helper)": (0.15, 0.06, "grey"),
# "HelperRole_ABL_Insocial": (0.01775, 0.010544, default_colors[4]),
}
env_type = str(figure_id)
fig_type = "test"
top_n = 16
elif str(figure_id) == "TalkItOut":
print("You mean Polite")
exit()
elif str(figure_id) == "TalkItOutPolite":
# env_type = "TalkItOut"
# get_datasets("storage/", "ORIENT_env_MiniGrid-TalkItOut")
# env_type = "GuideThief"
# get_datasets("storage/", "GuideThief")
# env_type = "Bobo"
# get_datasets("storage/", "Bobo")
get_datasets("storage/", "20-05_NeurIPS_TalkItOutPolite", load_subsample_step=load_subsample_step)
# get_datasets("storage/", "21-05_NeurIPS_small_bonus_TalkItOutPolite")
get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step)
get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar", load_subsample_step=load_subsample_step)
label_parser_dict = {
"TalkItOutPolite_NO_BONUS_env": "PPO",
"TalkItOutPolite_e": "PPO+Explo",
"TalkItOutPolite_NO_BONUS_NoLiar": "PPO (no liar)",
"TalkItOutPolite_NoLiar_e": "PPO+Explo (no liar)",
"26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env": "Unsocial PPO",
"26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar": "Unsocial PPO (no liar)",
}
env_type = str(figure_id)
fig_type = "test"
top_n = 16
elif str(figure_id) == "DiverseExit":
get_datasets("storage/", "24-05_NeurIPS_DiverseExit", load_subsample_step=load_subsample_step)
get_datasets("storage/", "26-05_NeurIPS_gpu_DiverseExit", load_subsample_step=load_subsample_step)
label_parser_dict = {
"DiverseExit_NO_BONUS": "No_bonus",
"DiverseExit_BONUS": "BOnus",
"gpu_DiverseExit_NoSocial": "No_social",
}
env_type = str(figure_id)
fig_type = "test"
top_n = 16
else:
get_datasets("storage/", str(figure_id), load_subsample_step=load_subsample_step)
env_type = str(figure_id)
fig_type = "test"
top_n = 8
#### get_datasets("storage/", "RERUN_WizardGuide_lang64_nameless")
#### get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_nameless")
if per_model_colors: # order runs for legend order as in per_models_colors, with corresponding colors
ordered_labels = OrderedDict()
for teacher_type in per_model_colors.keys():
for k,v in labels.items():
if teacher_type in k:
ordered_labels[k] = v
labels = ordered_labels
else:
print('not using per_model_color')
for k in models_saves.keys():
labels[k] = k
def plot_with_shade(subplot_nb, ax,x,y,err,color,shade_color,label,
y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
zorder=None, xlabel='perf',ylabel='env steps'):
#plt.rcParams.update({'font.size': 15})
ax.locator_params(axis='x', nbins=4)
ax.locator_params(axis='y', nbins=3)
ax.tick_params(axis='both', which='major', labelsize=ticksize)
ax.plot(x,y, color=color, label=label,linewidth=linewidth,zorder=zorder)
ax.fill_between(x,y-err,y+err,color=shade_color,alpha=0.2)
if legend:
leg = ax.legend(loc=leg_loc, **leg_args) #34
for legobj in leg.legendHandles:
legobj.set_linewidth(leg_linewidth)
ax.set_xlabel(xlabel, fontsize=30)
if subplot_nb == 0:
ax.set_ylabel(ylabel, fontsize=30,labelpad=-4)
ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
ax.set_ylim(bottom=ylim[0],top=ylim[1])
if title:
ax.set_title(title, fontsize=22)
# Plot utils
def plot_with_shade_grg(subplot_nb, ax,x,y,err,color,shade_color,label,
y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20,
zorder=None, xlabel='perf',ylabel='env steps', linestyle="-"):
#plt.rcParams.update({'font.size': 15})
ax.locator_params(axis='x', nbins=4)
ax.locator_params(axis='y', nbins=3)
ax.tick_params(axis='both', which='major', labelsize=ticksize)
ax.plot(x, y, color=color, label=label,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
ax.fill_between(x, y-err, y+err,color=shade_color,alpha=0.2)
if legend:
leg = ax.legend(loc=leg_loc, **leg_args) #34
for legobj in leg.legendHandles:
legobj.set_linewidth(leg_linewidth)
ax.set_xlabel(xlabel, fontsize=30)
if subplot_nb == 0:
ax.set_ylabel(ylabel, fontsize=30, labelpad=-4)
ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
ax.set_ylim(bottom=ylim[0],top=ylim[1])
if title:
ax.set_title(title, fontsize=22)
# Metric plot
metric = 'bin_extrinsic_return_mean'
# metric = 'mission_string_observed_mean'
# metric = 'extrinsic_return_mean'
# metric = 'extrinsic_return_max'
# metric = "rreturn_mean"
# metric = 'rreturn_max'
# metric = 'FPS'
f, ax = plt.subplots(1, 1, figsize=(10.0, 6.0))
ax = [ax]
max_y = -np.inf
min_y = np.inf
# hardcoded
min_y, max_y = 0.0, 1.0
max_steps = 0
exclude_patterns = []
include_patterns = []
def label_parser(label, figure_id, label_parser_dict=None):
if label_parser_dict:
if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
if label in label_parser_dict:
# see if there is an exact match
return label_parser_dict[label]
else:
print("ERROR multiple curves match a lable and there is no exact match")
print(label)
exit()
for k, v in label_parser_dict.items():
if k in label: return v
else:
# return label.split("_env_")[1]
if figure_id not in [1,2,3,4]:
return label
else:
label_parser_dict = {
"RERUN_WizardGuide_lang64_no_explo": "MH-BabyAI",
"RERUN_WizardTwoGuides_lang64_no_explo": "MH-BabyAI",
"RERUN_WizardGuide_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",
"RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus",
"RERUN_WizardGuide_lang64_deaf_no_explo": "Deaf-MH-BabyAI",
"RERUN_WizardTwoGuides_lang64_deaf_no_explo": "Deaf-MH-BabyAI",
"RERUN_WizardGuide_lang64_bow": "MH-BabyAI-ExpBonus-BOW",
"RERUN_WizardTwoGuides_lang64_bow": "MH-BabyAI-ExpBonus-BOW",
"RERUN_WizardGuide_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",
"RERUN_WizardTwoGuides_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem",
"RERUN_WizardGuide_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",
"RERUN_WizardTwoGuides_lang64_bigru": "MH-BabyAI-ExpBonus-bigru",
"RERUN_WizardGuide_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",
"RERUN_WizardTwoGuides_lang64_attgru": "MH-BabyAI-ExpBonus-attgru",
"RERUN_WizardGuide_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",
"RERUN_WizardTwoGuides_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue",
"RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_100M": "MH-BabyAI-ExpBonus-100M"
}
if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1:
print("ERROR multiple curves match a lable")
print(label)
exit()
for k, v in label_parser_dict.items():
if k in label: return v
return label
per_seed=False
for i, m_id in enumerate(models_saves.keys()):
#excluding some experiments
if any([ex_pat in m_id for ex_pat in exclude_patterns]):
continue
if len(include_patterns) > 0:
if not any([in_pat in m_id for in_pat in include_patterns]):
continue
runs_data = models_saves[m_id]['data']
ys = []
# DIRTY FIX FOR FAULTY LOGGING
print("m_id:", m_id)
if runs_data[0]['frames'][1] == 'frames':
runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))
###########################################
# determine minimal run length across seeds
minimum = sorted([len(run['frames']) for run in runs_data if len(run['frames'])])[-top_n]
min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])
# min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])
print("min_len:", min_len)
#compute env steps (x axis)
longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / 1000000
steps = steps[:min_len]
for run in runs_data:
data = run[metric]
# DIRTY FIX FOR FAULTY LOGGING (headers in data)
if data[1] == metric:
data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)
###########################################
if len(data) >= min_len:
if len(data) > min_len:
print("run has too many {} datapoints ({}). Discarding {}".format(m_id, len(data),
len(data)-min_len))
data = data[0:min_len]
ys.append(data)
ys_same_len = ys # RUNS MUST HAVE SAME LEN
# computes stats
n_seeds = len(ys_same_len)
sems = np.std(ys_same_len,axis=0)/np.sqrt(len(ys_same_len)) # sem
stds = np.std(ys_same_len,axis=0) # std
means = np.mean(ys_same_len,axis=0)
color = default_colors[i]
# per-metric adjusments
ylabel=metric
if metric == 'bin_extrinsic_return_mean':
ylabel = "success rate"
if metric == 'duration':
ylabel = "time (hours)"
means = means / 3600
sems = sems / 3600
stds = stds / 3600
#plot x y bounds
curr_max_y = np.max(means)
curr_min_y = np.min(means)
curr_max_steps = np.max(steps)
if curr_max_y > max_y:
max_y = curr_max_y
if curr_min_y < min_y:
min_y = curr_min_y
if curr_max_steps > max_steps:
max_steps = curr_max_steps
if subsample_step:
steps = steps[0::subsample_step]
means = means[0::subsample_step]
stds = stds[0::subsample_step]
sems = sems[0::subsample_step]
ys_same_len = [y[0::subsample_step] for y in ys_same_len]
# display seeds separtely
if per_seed:
for s_i, seed_ys in enumerate(ys_same_len):
seed_c = default_colors[i+s_i]
label = m_id#+"(s:{})".format(s_i)
plot_with_shade(0, ax[0], steps, seed_ys, stds*0, seed_c, seed_c, label,
legend=False, xlim=[0, max_steps], ylim=[min_y, max_y],
leg_size=leg_size, xlabel="env steps (millions)", ylabel=ylabel, smooth_factor=smooth_factor,
)
else:
label = label_parser(m_id, figure_id, label_parser_dict=label_parser_dict)
label = label #+"({})".format(n_seeds)
def smooth(x_, n=50):
if type(x_) == list:
x_ = np.array(x_)
return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])
if smooth_factor:
means = smooth(means,smooth_factor)
stds = smooth(stds,smooth_factor)
x_lim = 30
if figure_id == "TalkItOutPolite":
leg_args = {
'ncol': 1,
'columnspacing': 1.0,
'handlelength': 1.0,
'frameon': False,
# 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
'bbox_to_anchor': (0.55, 0.35, 0.10, .102),
'labelspacing': 0.2,
'fontsize': 27
}
elif figure_id == "Help":
leg_args = {
'ncol': 1,
'columnspacing': 1.0,
'handlelength': 1.0,
'frameon': False,
# 'bbox_to_anchor': (0.00, 0.23, 0.10, .102),
'bbox_to_anchor': (0.39, 0.20, 0.10, .102),
'labelspacing': 0.2,
'fontsize': 27
}
else:
leg_args = {}
color_code = dict([
('PPO+Explo', 'indianred'),
('PPO', "#1f77b4"),
('Unsocial PPO', "grey"),
('PPO (no liar)', "#043252"),
('PPO+Explo (no liar)', "darkred"),
('Unsocial PPO (no liar)', "black"),
('PPO+Explo (helper)', 'indianred'),
('PPO (helper)', "#1f77b4"),
('Unsocial PPO (helper)', "grey")]
)
color = color_code.get(label, np.random.choice(default_colors))
print("C:",color)
plot_with_shade_grg(
0, ax[0], steps, means, stds, color, color, label,
legend=True,
xlim=[0, steps[-1] if not x_lim else x_lim],
ylim=[0, 1.0], xlabel="env steps (millions)", ylabel=ylabel, title=None,
leg_args =leg_args)
#
# plot_with_shade(0, ax[0], steps, means, stds, color, color,label,
# legend=True, xlim=[0, max_steps], ylim=[min_y, max_y],
# leg_size=leg_size, xlabel="Env steps (millions)", ylabel=ylabel, linewidth=5.0, smooth_factor=smooth_factor)
for label, (mean, std, color) in static_lines.items():
plot_with_shade_grg(
0, ax[0], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label,
legend=True,
xlim=[0, max_steps],
ylim=[0, 1.0],
xlabel="env steps (millions)", ylabel=ylabel, linestyle=":",
leg_args=leg_args)
plt.tight_layout()
f.savefig('graphics/{}_results.svg'.format(str(figure_id)))
f.savefig('graphics/{}_results.png'.format(str(figure_id)))
plt.show()