import os import json import time import csv import numpy as np import sys import pickle import base64 from scipy.special import softmax vis_path = ".." sys.path.append(vis_path) from context import VisContext, ActiveLearningContext, AnormalyContext from strategy import DeepDebugger, TimeVis, tfDeepVisualInsight, DVIAL, tfDVIDenseAL, TimeVisDenseAL, TrustActiveLearningDVI,DeepVisualInsight, TrustProxyDVI from singleVis.eval.evaluate import evaluate_isAlign, evaluate_isNearestNeighbour, evaluate_isAlign_single, evaluate_isNearestNeighbour_single """Interface align""" def initialize_strategy(CONTENT_PATH, VIS_METHOD, SETTING, dense=False): # initailize strategy (visualization method) with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f: conf = json.load(f) config = conf[VIS_METHOD] # todo support timevis, curretnly only support dvi # remove unnecessary parts if SETTING == "normal" or SETTING == "abnormal": if VIS_METHOD == "TrustVisActiveLearning": strategy = TrustActiveLearningDVI(CONTENT_PATH, config) elif VIS_METHOD == "TrustVisProxy": strategy = TrustProxyDVI(CONTENT_PATH, config) elif VIS_METHOD == "DVI": strategy = DeepVisualInsight(CONTENT_PATH, config) elif VIS_METHOD == "TimeVis": strategy = TimeVis(CONTENT_PATH, config) elif VIS_METHOD == "DeepDebugger": strategy = DeepDebugger(CONTENT_PATH, config) else: raise NotImplementedError elif SETTING == "active learning": if dense: if VIS_METHOD == "DVI": strategy = tfDVIDenseAL(CONTENT_PATH, config) elif VIS_METHOD == "TimeVis": strategy = TimeVisDenseAL(CONTENT_PATH, config) else: raise NotImplementedError else: strategy = DVIAL(CONTENT_PATH, config) else: raise NotImplementedError return strategy # todo remove unnecessary parts def initialize_context(strategy, setting): if setting == "normal": context = VisContext(strategy) elif setting == "active learning": context = ActiveLearningContext(strategy) elif setting == "abnormal": context = AnormalyContext(strategy) else: raise NotImplementedError return context def initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING, dense=False): """ initialize backend for visualization Args: CONTENT_PATH (str): the directory to training process VIS_METHOD (str): visualization strategy "DVI", "TimeVis", "DeepDebugger",... setting (str): context "normal", "active learning", "dense al", "abnormal" Raises: NotImplementedError: _description_ Returns: backend: a context with a specific strategy """ strategy = initialize_strategy(CONTENT_PATH, VIS_METHOD, SETTING, dense) context = initialize_context(strategy=strategy, setting=SETTING) return context def get_train_test_data(context, EPOCH): train_data = context.train_representation_data(EPOCH) test_data = context.test_representation_data(EPOCH) all_data = np.concatenate((train_data, test_data), axis=0) return all_data def get_train_test_label(context, EPOCH): train_labels = context.train_labels(EPOCH) test_labels = context.test_labels(EPOCH) labels = np.concatenate((train_labels, test_labels), axis=0).astype(int) return labels # def get_strategy_by_setting(CONTENT_PATH, config, VIS_METHOD, SETTING, dense=False): # if SETTING == "normal" or SETTING == "abnormal": # if VIS_METHOD == "DVI": # strategy = tfDeepVisualInsight(CONTENT_PATH, config) # elif VIS_METHOD == "TimeVis": # strategy = TimeVis(CONTENT_PATH, config) # elif VIS_METHOD == "DeepDebugger": # strategy = DeepDebugger(CONTENT_PATH, config) # else: # raise NotImplementedError # elif SETTING == "active learning": # if dense: # if VIS_METHOD == "DVI": # strategy = tfDVIDenseAL(CONTENT_PATH, config) # elif VIS_METHOD == "TimeVis": # strategy = TimeVisDenseAL(CONTENT_PATH, config) # else: # raise NotImplementedError # else: # strategy = DVIAL(CONTENT_PATH, config) # else: # raise NotImplementedError # return strategy # def update_embeddings(new_strategy, context, EPOCH, all_data, is_focus): # embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy") # if os.path.exists(embedding_path): # original_embedding_2d = np.load(embedding_path) # dd = TimeVis(context.contentpath,new_conf) # dd._preprocess() # dd._train() # embedding_2d = dd.projector.batch_project(EPOCH, all_data) # return embedding_2d # def find_and_add_nearest_neighbors(data, subset_indices, num_neighbors=10): # dimension = len(data[0]) # Assuming all data points have the same dimension # t = AnnoyIndex(dimension, 'euclidean') # 'euclidean' distance metric; you can use 'angular' as well # # Build the index with the entire data # for i, vector in enumerate(data): # t.add_item(i, vector) # t.build(10) # Number of trees. More trees gives higher precision. # # Use a set for faster look-up and ensuring no duplicates # subset_indices_set = set(subset_indices) # for idx in subset_indices: # nearest_neighbors = t.get_nns_by_item(idx, num_neighbors) # # Use set union operation to merge indices without duplicates # subset_indices_set = subset_indices_set.union(nearest_neighbors) # # Convert set back to list # return list(subset_indices_set) # def get_expanded_subset(context, EPOCH, subset_indices): # all_data = get_train_test_data(context, EPOCH) # expanded_subset = find_and_add_nearest_neighbors(all_data, subset_indices) # return expanded_subset # def update_vis_error_points(new_strategy, context, EPOCH, is_focus): # embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy") # if os.path.exists(embedding_path): # original_embedding_2d = np.load(embedding_path) # new_strategy._train() # new_strategy.projector.batch_project # embedding_2d = dd.projector.batch_project(EPOCH, all_data) # update_embeddings(strategy, context, EPOCH, True) def update_epoch_projection(context, EPOCH, predicates, isContraVis): # TODO consider active learning setting train_data = context.train_representation_data(EPOCH) test_data = context.test_representation_data(EPOCH) all_data = np.concatenate((train_data, test_data), axis=0) print(len(all_data)) train_labels = context.train_labels(EPOCH) # test_labels = context.test_labels(EPOCH) # labels = np.concatenate((train_labels, test_labels), axis=0).astype(int) labels = train_labels embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy") if os.path.exists(embedding_path): embedding_2d = np.load(embedding_path) else: embedding_2d = context.strategy.projector.batch_project(EPOCH, all_data) np.save(embedding_path, embedding_2d) training_data_number = context.strategy.config["TRAINING"]["train_num"] testing_data_number = context.strategy.config["TRAINING"]["test_num"] training_data_index = list(range(training_data_number)) testing_data_index = list(range(training_data_number, training_data_number + testing_data_number)) # return the image of background # read cache if exists bgimg_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "bgimg.png") scale_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "scale.npy") # grid_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "grid.pkl") if os.path.exists(bgimg_path) and os.path.exists(scale_path): # with open(os.path.join(grid_path), "rb") as f: # grid = pickle.load(f) with open(bgimg_path, 'rb') as img_f: img_stream = img_f.read() b_fig = base64.b64encode(img_stream).decode() grid = np.load(scale_path) else: x_min, y_min, x_max, y_max, b_fig = context.strategy.vis.get_background(EPOCH, context.strategy.config["VISUALIZATION"]["RESOLUTION"]) grid = [x_min, y_min, x_max, y_max] # formating grid = [float(i) for i in grid] b_fig = str(b_fig, encoding='utf-8') # save results, grid and decision_view # with open(grid_path, "wb") as f: # pickle.dump(grid, f) np.save(embedding_path, embedding_2d) # TODO fix its structure eval_new = dict() file_name = context.strategy.config["VISUALIZATION"]["EVALUATION_NAME"] save_eval_dir = os.path.join(context.strategy.data_provider.model_path, file_name + ".json") if os.path.exists(save_eval_dir): evaluation = context.strategy.evaluator.get_eval(file_name=file_name) eval_new["train_acc"] = evaluation["train_acc"][str(EPOCH)] eval_new["test_acc"] = evaluation["test_acc"][str(EPOCH)] else: eval_new["train_acc"] = 0 eval_new["test_acc"] = 0 color = context.strategy.vis.get_standard_classes_color() * 255 color = color.astype(int) CLASSES = np.array(context.strategy.config["CLASSES"]) # label_color_list = [0] * len(labels) label_color_list = color[labels].tolist() label_list = CLASSES[labels].tolist() label_name_dict = dict(enumerate(CLASSES)) prediction_list = [] # if (isContraVis == 'false'): # prediction = context.strategy.data_provider.get_pred(EPOCH, all_data).argmax(1) # for i in range(len(prediction)): # prediction_list.append(CLASSES[prediction[i]]) for i in range(len(train_data)): prediction_list.append("0") EPOCH_START = context.strategy.config["EPOCH_START"] EPOCH_PERIOD = context.strategy.config["EPOCH_PERIOD"] EPOCH_END = context.strategy.config["EPOCH_END"] max_iter = (EPOCH_END - EPOCH_START) // EPOCH_PERIOD + 1 # max_iter = context.get_max_iter() # current_index = timevis.get_epoch_index(EPOCH) # selected_points = np.arange(training_data_number + testing_data_number)[current_index] selected_points = np.arange(training_data_number + testing_data_number) for key in predicates.keys(): if key == "label": tmp = np.array(context.filter_label(predicates[key])) elif key == "type": tmp = np.array(context.filter_type(predicates[key], int(EPOCH))) else: tmp = np.arange(training_data_number + testing_data_number) selected_points = np.intersect1d(selected_points, tmp) properties = np.concatenate((np.zeros(training_data_number, dtype=np.int16), 2*np.ones(testing_data_number, dtype=np.int16)), axis=0) lb = context.get_epoch_index(EPOCH) ulb = np.setdiff1d(training_data_index, lb) properties[ulb] = 1 highlightedPointIndices = [] if (isContraVis == 'false'): high_pred = context.strategy.data_provider.get_pred(EPOCH, all_data).argmax(1) inv_high_dim_data = context.strategy.projector.batch_inverse(EPOCH, embedding_2d) inv_high_pred = context.strategy.data_provider.get_pred(EPOCH, inv_high_dim_data).argmax(1) highlightedPointIndices = np.where(high_pred != inv_high_pred)[0] print("EMBEDDINGLEN", len(embedding_2d)) return embedding_2d.tolist(), grid, b_fig, label_name_dict, label_color_list, label_list, max_iter, training_data_index, testing_data_index, eval_new, prediction_list, selected_points, properties, highlightedPointIndices, def getContraVisChangeIndices(context, iterationLeft, iterationRight, method): predChangeIndices = [] train_data = context.train_representation_data(iterationLeft) test_data = context.test_representation_data(iterationLeft) all_data = np.concatenate((train_data, test_data), axis=0) embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationLeft), "embedding.npy") if os.path.exists(embedding_path): embedding_2d = np.load(embedding_path) else: embedding_2d = context.strategy.projector.batch_project(iterationLeft, all_data) np.save(embedding_path, embedding_2d) last_train_data = context.train_representation_data(iterationRight) last_test_data = context.test_representation_data(iterationRight) last_all_data = np.concatenate((last_train_data, last_test_data), axis=0) last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationRight), "embedding.npy") if os.path.exists(last_embedding_path): last_embedding_2d = np.load(last_embedding_path) else: last_embedding_2d = context.strategy.projector.batch_project(iterationRight, last_all_data) np.save(last_embedding_path, last_embedding_2d) if (method == "align"): predChangeIndices = evaluate_isAlign(embedding_2d, last_embedding_2d) elif (method == "nearest neighbour"): predChangeIndices = evaluate_isNearestNeighbour(embedding_2d, last_embedding_2d) elif (method == "both"): predChangeIndices_align = evaluate_isAlign(embedding_2d, last_embedding_2d) predChangeIndices_nearest = evaluate_isNearestNeighbour(embedding_2d, last_embedding_2d) intersection = set(predChangeIndices_align).intersection(predChangeIndices_nearest) predChangeIndices = list(intersection) else: print("wrong method") return predChangeIndices def getContraVisChangeIndicesSingle(context, iterationLeft, iterationRight, method, left_selected, right_selected): train_data = context.train_representation_data(iterationLeft) test_data = context.test_representation_data(iterationLeft) all_data = np.concatenate((train_data, test_data), axis=0) embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationLeft), "embedding.npy") if os.path.exists(embedding_path): embedding_2d = np.load(embedding_path) else: embedding_2d = context.strategy.projector.batch_project(iterationLeft, all_data) np.save(embedding_path, embedding_2d) last_train_data = context.train_representation_data(iterationRight) last_test_data = context.test_representation_data(iterationRight) last_all_data = np.concatenate((last_train_data, last_test_data), axis=0) last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationRight), "embedding.npy") if os.path.exists(last_embedding_path): last_embedding_2d = np.load(last_embedding_path) else: last_embedding_2d = context.strategy.projector.batch_project(iterationRight, last_all_data) np.save(last_embedding_path, last_embedding_2d) predChangeIndicesLeft = [] predChangeIndicesRight = [] predChangeIndicesLeft_Left = [] predChangeIndicesLeft_Right = [] predChangeIndicesRight_Left = [] predChangeIndicesRight_Right = [] if (method == "align"): predChangeIndicesLeft, predChangeIndicesRight = evaluate_isAlign_single(embedding_2d, last_embedding_2d, left_selected, right_selected) elif (method == "nearest neighbour"): predChangeIndicesLeft_Left, predChangeIndicesLeft_Right,predChangeIndicesRight_Left, predChangeIndicesRight_Right= evaluate_isNearestNeighbour_single(embedding_2d, last_embedding_2d, left_selected, right_selected) return predChangeIndicesLeft, predChangeIndicesRight, predChangeIndicesLeft_Left, predChangeIndicesLeft_Right, predChangeIndicesRight_Left, predChangeIndicesRight_Right def getCriticalChangeIndices(context, curr_iteration, last_iteration): predChangeIndices = [] train_data = context.train_representation_data(curr_iteration) test_data = context.test_representation_data(curr_iteration) all_data = np.concatenate((train_data, test_data), axis=0) embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(curr_iteration), "embedding.npy") if os.path.exists(embedding_path): embedding_2d = np.load(embedding_path) else: embedding_2d = context.strategy.projector.batch_project(curr_iteration, all_data) np.save(embedding_path, embedding_2d) last_train_data = context.train_representation_data(last_iteration) last_test_data = context.test_representation_data(last_iteration) last_all_data = np.concatenate((last_train_data, last_test_data), axis=0) last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(last_iteration), "embedding.npy") if os.path.exists(last_embedding_path): last_embedding_2d = np.load(last_embedding_path) else: last_embedding_2d = context.strategy.projector.batch_project(last_iteration, last_all_data) np.save(last_embedding_path, last_embedding_2d) high_pred = context.strategy.data_provider.get_pred(curr_iteration, all_data).argmax(1) last_high_pred = context.strategy.data_provider.get_pred(last_iteration, last_all_data).argmax(1) predChangeIndices = np.where(high_pred != last_high_pred)[0] return predChangeIndices def getConfChangeIndices(context, curr_iteration, last_iteration, confChangeInput): train_data = context.train_representation_data(curr_iteration) test_data = context.test_representation_data(curr_iteration) all_data = np.concatenate((train_data, test_data), axis=0) embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(curr_iteration), "embedding.npy") if os.path.exists(embedding_path): embedding_2d = np.load(embedding_path) else: embedding_2d = context.strategy.projector.batch_project(curr_iteration, all_data) np.save(embedding_path, embedding_2d) last_train_data = context.train_representation_data(last_iteration) last_test_data = context.test_representation_data(last_iteration) last_all_data = np.concatenate((last_train_data, last_test_data), axis=0) last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(last_iteration), "embedding.npy") if os.path.exists(last_embedding_path): last_embedding_2d = np.load(last_embedding_path) else: last_embedding_2d = context.strategy.projector.batch_project(last_iteration, last_all_data) np.save(last_embedding_path, last_embedding_2d) high_pred = context.strategy.data_provider.get_pred(curr_iteration, all_data) last_high_pred = context.strategy.data_provider.get_pred(last_iteration, last_all_data) high_conf = softmax(high_pred, axis=1) last_high_conf = softmax(last_high_pred, axis=1) # get class type with highest prob high_pred_class = high_conf.argmax(axis=1) last_high_pred_class = last_high_conf.argmax(axis=1) same_pred_indices = np.where(high_pred_class == last_high_pred_class)[0] print("same") print(same_pred_indices) # get conf_diff = np.abs(high_conf[np.arange(len(high_conf)), high_pred_class] - last_high_conf[np.arange(len(last_high_conf)), last_high_pred_class]) print("conf") print(conf_diff) significant_conf_change_indices = same_pred_indices[conf_diff[same_pred_indices] > confChangeInput] print("siginificant") print(significant_conf_change_indices) return significant_conf_change_indices def add_line(path, data_row): """ data_row: list, [API_name, username, time] """ now_time = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()) data_row.append(now_time) with open(path, "a+") as f: csv_write = csv.writer(f) csv_write.writerow(data_row)