Federico Galatolo
work in progress
bc679dd
import os
import argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotszoo
def get_hyperparameters(data_augmentation, sampler):
hp = ["lr", "rpn_loss_weight", "roi_heads_loss_weight", "rois_per_image"]
if data_augmentation == "full":
hp.extend(["random_brightness", "random_contrast"])
if data_augmentation == "full" or data_augmentation == "crop-flip":
hp.extend(["random_crop"])
if sampler == "RepeatFactorTrainingSampler":
hp.extend(["repeat_factor_th"])
return ["config/"+i for i in hp]
def plot_study():
query = {"$or": [{"config.wandb_tag": {"$eq": tag}} for tag in args.tags_study_replicas]}
data = plotszoo.data.WandbData(args.username, args.project, query, verbose=args.verbose)
data.pull_scalars(force_update=args.update_scalars)
group_keys = ["config/sampler", "config/data_augmentation"]
fig, axes = plt.subplots(nrows=2, ncols=2)
yticks_fn = lambda index: "Sampler: %s Data Augmentation: %s" % (index[0], index[1])
test_detection_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/test/results/detection_accuracy")
test_classification_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/test/results/classification_accuracy")
test_detection_df = test_detection_plot.plot(axes[0][0], title="Test Detection Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
test_classification_df = test_classification_plot.plot(axes[0][1], title="Test Classification Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
train_detection_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/train/results/detection_accuracy")
train_classification_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/train/results/classification_accuracy")
train_detection_df = train_detection_plot.plot(axes[1][0], title="Train Detection Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
train_classification_df = train_classification_plot.plot(axes[1][1], title="Train Classification Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
test_detection_df.to_excel(os.path.join(args.output_directory, "study/test_detection.xlsx"))
test_classification_df.to_excel(os.path.join(args.output_directory, "study/test_classification.xlsx"))
train_detection_df.to_excel(os.path.join(args.output_directory, "study/train_detection.xlsx"))
train_classification_df.to_excel(os.path.join(args.output_directory, "study/train_classification.xlsx"))
for ax in axes.flatten():
ax.set_xlim(xmin=0.5)
fig.set_size_inches(30, 10)
fig.tight_layout()
plotszoo.utils.savefig(fig, os.path.join(args.output_directory, "study.png"))
def plot_optimization_history(ax, data, dataset):
running_max = dict(accuracy=float("-inf"), detection_accuracy=float("-inf"), classification_accuracy=float("-inf"))
plots=dict(best_accuracy=[], best_detection_accuracy=[], best_classification_accuracy=[], accuracy=[], detection_accuracy=[], classification_accuracy=[])
plot_index = []
for i, row in data.scalars.iterrows():
if row["summary/"+dataset+"/results/accuracy"] > running_max["accuracy"]:
running_max = dict(
accuracy=row["summary/"+dataset+"/results/accuracy"],
detection_accuracy=row["summary/"+dataset+"/results/detection_accuracy"],
classification_accuracy=row["summary/"+dataset+"/results/classification_accuracy"]
)
plots["accuracy"].append(row["summary/"+dataset+"/results/accuracy"])
plots["detection_accuracy"].append(row["summary/"+dataset+"/results/detection_accuracy"])
plots["classification_accuracy"].append(row["summary/"+dataset+"/results/classification_accuracy"])
plots["best_accuracy"].append(running_max["accuracy"])
plots["best_detection_accuracy"].append(running_max["detection_accuracy"])
plots["best_classification_accuracy"].append(running_max["classification_accuracy"])
plot_index.append(i)
ax.plot(plot_index, plots["best_accuracy"], "k", label="Best "+dataset+" Accuracy")
ax.plot(plot_index, plots["best_detection_accuracy"], "b--", label="Best "+dataset+" Detection Accuracy")
ax.plot(plot_index, plots["best_classification_accuracy"], "g--", label="Best "+dataset+" Classification Accuracy")
ax.scatter(plot_index, plots["accuracy"], c="k", alpha=0.5)
ax.scatter(plot_index, plots["detection_accuracy"], c="b", alpha=0.5)
ax.scatter(plot_index, plots["classification_accuracy"], c="g", alpha=0.5)
ax.legend(loc="lower right")
def plot_optimization():
for tag, params in args.tags_optimization.items():
query = {"config.wandb_tag": {"$eq": tag}}
parameters = get_hyperparameters(**params)
parameters.extend(["summary/train/results/detection_accuracy", "summary/train/results/classification_accuracy"])
data = plotszoo.data.WandbData(args.username, args.project, query, verbose=args.verbose)
data.pull_scalars(force_update=args.update_scalars)
assert len(data.scalars) > 0, "No data, check the tag name"
data.pull_series(force_update=args.update_series)
data.astype(["summary/train/results/accuracy", "summary/train/results/detection_accuracy", "summary/train/results/classification_accuracy"], float)
data.dropna(["summary/train/results/accuracy"])
data.create_scalar_from_series("start_time", lambda s: s["_timestamp"].min())
fig, axes = plt.subplots(1, len(parameters), sharey=False)
parallel_plot = plotszoo.scalars.ScalarsParallelCoordinates(data, parameters, "summary/train/results/accuracy")
parallel_plot.plot(axes)
fig.set_size_inches(32, 10)
plotszoo.utils.savefig(fig, os.path.join(args.output_directory, tag, "optim_parallel.png"))
fig, ax = plt.subplots(2, 1)
plot_optimization_history(ax[0], data, "train")
plot_optimization_history(ax[1], data, "test")
fig.set_size_inches(20, 10)
plotszoo.utils.savefig(fig, os.path.join(args.output_directory, tag, "optim_history.png"))
parameters.remove("summary/train/results/detection_accuracy")
parameters.remove("summary/train/results/classification_accuracy")
args_names = [p.split("/")[1].replace("_","-") for p in parameters]
best_run = data.scalars["summary/train/results/accuracy"].idxmax()
best_args = "".join(["--%s %s " % (n, data.scalars[k][best_run]) for n, k in zip(args_names, parameters)])
best_args += "".join(["--%s %s " % (k.replace("_", "-"), v) for k, v in params.items()])
print(best_run)
print("Tag: %s" % tag)
print(data.scalars.loc[best_run][["summary/train/results/detection_accuracy", "summary/train/results/classification_accuracy"]])
print("HP: %s" % best_args)
print()
best_args_f = open(os.path.join(args.output_directory, tag, "best_args.txt"), "w")
best_args_f.write(best_args)
best_args_f.close()
def plot_replicas():
query = {"$or": [{"config.wandb_tag": {"$eq": tag}} for tag in args.tags_best_replicas]}
data = plotszoo.data.WandbData(args.username, args.project, query, verbose=args.verbose)
data.pull_scalars(force_update=args.update_scalars)
group_keys = ["config/sampler"]
fig, axes = plt.subplots(nrows=2, ncols=1)
yticks_fn = lambda index: "Sampler: %s" % (index, )
detection_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/test/results/detection_accuracy")
classification_plot = plotszoo.scalars.grouped.GroupedScalarsBarchart(data, group_keys, "summary/test/results/classification_accuracy")
detection_df = detection_plot.plot(axes[0], title="Test Detection Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
classification_df = classification_plot.plot(axes[1], title="Test Classification Accuracy", nbins=20, grid=True, yticks_fn=yticks_fn)
for ax in axes: ax.set_xlim(xmin=0.5)
fig.set_size_inches(20, 10)
fig.tight_layout()
classification_df.to_excel(os.path.join(args.output_directory, "result/classification.xlsx"))
detection_df.to_excel(os.path.join(args.output_directory, "result/detection.xlsx"))
print(classification_df)
print(detection_df)
plotszoo.utils.savefig(fig, os.path.join(args.output_directory, "results.png"))
def plot_tables():
query = {"$or": [{"config.wandb_tag": {"$eq": tag}} for tag in args.tags_best_replicas]}
data = plotszoo.data.WandbData(args.username, args.project, query, verbose=args.verbose)
data.pull_scalars(force_update=args.update_scalars)
group_keys = ["config/sampler"]
classes = ["neoplastic", "aphthous", "traumatic"]
metrics = ["precision", "recall", "f1-score"]
grouped_df = data.scalars.groupby(group_keys).agg(np.mean)
for group in grouped_df.index:
data_df = grouped_df.loc[group]
table = np.zeros((len(classes), len(metrics)))
for i, c in enumerate(classes):
for j, m in enumerate(metrics):
table[i, j] = data_df["summary/test/report/%s/%s" % (c, m)]*100
table_df = pd.DataFrame(table, columns=metrics, index=classes)
table_df.to_csv(os.path.join(args.output_directory, "%s_table.csv" % (group)))
print("Sampler: %s" % (group))
print(table_df)
print()
parser = argparse.ArgumentParser()
parser.add_argument("--output-directory", type=str, default="./plots")
parser.add_argument("--username", type=str, default="mlpi")
parser.add_argument("--project", type=str, default="oral-ai")
parser.add_argument("--tags-study-replicas", type=str, default=["study-3"], nargs="+")
parser.add_argument("--tags-optimization", type=dict, default={
"hp-optimization-none-trainingsampler-5": dict(
data_augmentation="none",
sampler="TrainingSampler"
),
"hp-optimization-none-repeatfactortrainingsampler-5": dict(
data_augmentation="none",
sampler="RepeatFactorTrainingSampler"
)
}, nargs="+")
parser.add_argument("--tags-best-replicas", type=str, default=["best-replicas-7"], nargs="+")
parser.add_argument("--update-scalars", action="store_true")
parser.add_argument("--update-series", action="store_true")
parser.add_argument("--verbose", action="store_true")
args = parser.parse_args()
plot_study()
plot_optimization()
plot_replicas()
#plot_tables()