Peverell
/

code_training_dynamic

+'''This class serves as a intermediate layer for tensorboard frontend and DeepDebugger backend'''
+from abc import ABC, abstractmethod
+import os
+import sys
+import json
+import time
+import torch
+import numpy as np
+import pickle
+import shutil
+import torch.nn
+from scipy.special import softmax
+from strategy import StrategyAbstractClass
+from singleVis.utils import *
+from singleVis.trajectory_manager import Recommender
+from singleVis.active_sampling import random_sampling, uncerainty_sampling
+# active_learning_path = "../../ActiveLearning"
+# sys.path.append(active_learning_path)
+'''the context for different dataset setting'''
+class Context(ABC):
+    """
+    The Context defines the interface of interest to users of our visualization method.
+    """
+    def __init__(self, strategy: StrategyAbstractClass) -> None:
+        """
+        Usually, the Context accepts a visualization strategy through the constructor, but
+        also provides a setter to change it at runtime.
+        """
+        self._strategy = strategy
+    @property
+    def strategy(self) -> StrategyAbstractClass:
+        return self._strategy
+    @strategy.setter
+    def strategy(self, strategy: StrategyAbstractClass) -> None:
+        self._strategy = strategy
+class VisContext(Context):
+    '''Normal setting'''
+    #################################################################################################################
+    #                                                                                                               #
+    #                                                  Adapter                                                      #
+    #                                                                                                               #
+    #################################################################################################################
+    def train_representation_data(self, EPOCH):
+        return self.strategy.data_provider.train_representation(EPOCH)
+    def test_representation_data(self, EPOCH):
+        return self.strategy.data_provider.test_representation(EPOCH)
+    def train_labels(self, EPOCH):
+        return self.strategy.data_provider.train_labels(EPOCH)
+    def test_labels(self, EPOCH):
+        return self.strategy.data_provider.test_labels(EPOCH)
+    def suggest_abnormal(self, strategy, acc_idxs, rej_idxs, budget):
+        ntd = self._init_detection()
+        if strategy == "TBSampling":
+            suggest_idxs, scores = ntd.sample_batch_init(acc_idxs, rej_idxs, budget)
+        elif strategy == "Feedback":
+            suggest_idxs, scores = ntd.sample_batch(acc_idxs, rej_idxs, budget)
+        else:
+            raise NotImplementedError
+        suggest_labels = self.clean_labels[suggest_idxs]
+        return suggest_idxs, scores, suggest_labels
+    #################################################################################################################
+    #                                                                                                               #
+    #                                                data Panel                                                     #
+    #                                                                                                               #
+    #################################################################################################################
+    def batch_inv_preserve(self, epoch, data):
+        """
+        get inverse confidence for a single point
+        :param epoch: int
+        :param data: numpy.ndarray
+        :return l: boolean, whether reconstruction data have the same prediction
+        :return conf_diff: float, (0, 1), confidence difference
+        """
+        embedding = self.strategy.projector.batch_project(epoch, data)
+        recon = self.strategy.projector.batch_inverse(epoch, embedding)
+        ori_pred = self.strategy.data_provider.get_pred(epoch, data)
+        new_pred = self.strategy.data_provider.get_pred(epoch, recon)
+        ori_pred = softmax(ori_pred, axis=1)
+        new_pred = softmax(new_pred, axis=1)
+        old_label = ori_pred.argmax(-1)
+        new_label = new_pred.argmax(-1)
+        l = old_label == new_label
+        old_conf = [ori_pred[i, old_label[i]] for i in range(len(old_label))]
+        new_conf = [new_pred[i, old_label[i]] for i in range(len(old_label))]
+        old_conf = np.array(old_conf)
+        new_conf = np.array(new_conf)
+        conf_diff = old_conf - new_conf
+        return l, conf_diff
+    #################################################################################################################
+    #                                                                                                               #
+    #                                                Search Panel                                                   #
+    #                                                                                                               #
+    #################################################################################################################
+    # TODO: fix bugs accroding to new api
+    # customized features
+    def filter_label(self, label, epoch_id):
+        try:
+            index = self.strategy.data_provider.classes.index(label)
+        except:
+            index = -1
+        train_labels = self.strategy.data_provider.train_labels(epoch_id)
+        test_labels = self.strategy.data_provider.test_labels(epoch_id)
+        labels = np.concatenate((train_labels, test_labels), 0)
+        idxs = np.argwhere(labels == index)
+        idxs = np.squeeze(idxs)
+        return idxs
+    def filter_type(self, type, epoch_id):
+        if type == "train":
+            res = self.get_epoch_index(epoch_id)
+        elif type == "test":
+            train_num = self.strategy.data_provider.train_num
+            test_num = self.strategy.data_provider.test_num
+            res = list(range(train_num, train_num+ test_num, 1))
+        elif type == "unlabel":
+            labeled = np.array(self.get_epoch_index(epoch_id))
+            train_num = self.strategy.data_provider.train_num
+            all_data = np.arange(train_num)
+            unlabeled = np.setdiff1d(all_data, labeled)
+            res = unlabeled.tolist()
+        else:
+            # all data
+            train_num = self.strategy.data_provider.train_num
+            test_num = self.strategy.data_provider.test_num
+            res = list(range(0, train_num + test_num, 1))
+        return res
+    def filter_conf(self, conf_min, conf_max, epoch_id):
+        train_data = self.strategy.data_provider.train_representation(epoch_id)
+        test_data =self.strategy.data_provider.test_representation(epoch_id)
+        data = np.concatenate((train_data, test_data), axis=0)
+        pred = self.strategy.data_provider.get_pred(epoch_id, data)
+        scores = np.amax(softmax(pred, axis=1), axis=1)
+        res = np.argwhere(np.logical_and(scores<=conf_max, scores>=conf_min)).squeeze().tolist()
+        return res
+    #################################################################################################################
+    #                                                                                                               #
+    #                                             Helper Functions                                                  #
+    #                                                                                                               #
+    #################################################################################################################
+    def save_acc_and_rej(self, acc_idxs, rej_idxs, file_name):
+        d = {
+            "acc_idxs": acc_idxs,
+            "rej_idxs": rej_idxs
+        }
+        path = os.path.join(self.strategy.data_provider.content_path, "{}_acc_rej.json".format(file_name))
+        with open(path, "w") as f:
+            json.dump(d, f)
+        print("Successfully save the acc and rej idxs selected by user...")
+    def get_epoch_index(self, epoch_id):
+        """get the training data index for an epoch"""
+        index_file = os.path.join(self.strategy.data_provider.model_path, "Epoch_{:d}".format(epoch_id), "index.json")
+        index = load_labelled_data_index(index_file)
+        return index
+    def get_max_iter(self):
+        EPOCH_START = self.strategy.config["EPOCH_START"]
+        EPOCH_END = self.strategy.config["EPOCH_END"]
+        EPOCH_PERIOD = self.strategy.config["EPOCH_PERIOD"]
+        return int((EPOCH_END-EPOCH_START)/EPOCH_PERIOD)+1
+    def reset(self):
+        return
+class ActiveLearningContext(VisContext):
+    '''Active learning dataset'''
+    def __init__(self, strategy) -> None:
+        super().__init__(strategy)
+    '''Active learning setting'''
+    #################################################################################################################
+    #                                                                                                               #
+    #                                                  Adapter                                                      #
+    #                                                                                                               #
+    #################################################################################################################
+    def train_representation_data(self, iteration):
+        return self.strategy.data_provider.train_representation_all(iteration)
+    def train_labels(self, iteration):
+        labels = self.strategy.data_provider.train_labels_all()
+        return labels
+    def save_acc_and_rej(self, iteration, acc_idxs, rej_idxs, file_name):
+        d = {
+            "acc_idxs": acc_idxs,
+            "rej_idxs": rej_idxs
+        }
+        path = os.path.join(self.strategy.data_provider.checkpoint_path(iteration), "{}_acc_rej.json".format(file_name))
+        with open(path, "w") as f:
+            json.dump(d, f)
+        print("Successfully save the acc and rej idxs selected by user at Iteration {}...".format(iteration))
+    def reset(self, iteration):
+        # delete [iteration,...)
+        max_i = self.get_max_iter()
+        for i in range(iteration, max_i+1, 1):
+            path = self.strategy.data_provider.checkpoint_path(iteration)
+            shutil.rmtree(path)
+        iter_structure_path = os.path.join(self.strategy.data_provider.content_path, "iteration_structure.json")
+        with open(iter_structure_path, "r") as f:
+            i_s = json.load(f)
+        new_is = list()
+        for item in i_s:
+            value = item["value"]
+            if value < iteration:
+                new_is.append(item)
+        with open(iter_structure_path, "w") as f:
+            json.dump(new_is, f)
+        print("Successfully remove cache data!")
+    def get_epoch_index(self, iteration):
+        """get the training data index for an epoch"""
+        index_file = os.path.join(self.strategy.data_provider.checkpoint_path(iteration), "index.json")
+        index = load_labelled_data_index(index_file)
+        return index
+    def al_query(self, iteration, budget, strategy, acc_idxs, rej_idxs):
+        """get the index of new selection from different strategies"""
+        CONTENT_PATH = self.strategy.data_provider.content_path
+        NUM_QUERY = budget
+        NET = self.strategy.config["TRAINING"]["NET"]
+        DATA_NAME = self.strategy.config["DATASET"]
+        TOTAL_EPOCH = self.strategy.config["TRAINING"]["total_epoch"]
+        sys.path.append(CONTENT_PATH)
+        # record output information
+        # now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
+        # sys.stdout = open(os.path.join(CONTENT_PATH, now+".txt"), "w")
+        # loading neural network
+        import Model.model as subject_model
+        task_model = eval("subject_model.{}()".format(NET))
+        # start experiment
+        n_pool = self.strategy.config["TRAINING"]["train_num"]  # 50000
+        n_test = self.strategy.config["TRAINING"]['test_num']   # 10000
+        resume_path = self.strategy.data_provider.checkpoint_path(iteration)
+        idxs_lb = np.array(json.load(open(os.path.join(resume_path, "index.json"), "r")))
+        state_dict = torch.load(os.path.join(resume_path, "subject_model.pth"), map_location=torch.device('cpu'))
+        task_model.load_state_dict(state_dict)
+        NUM_INIT_LB = len(idxs_lb)
+        print('resume from iteration {}'.format(iteration))
+        print('number of labeled pool: {}'.format(NUM_INIT_LB))
+        print('number of unlabeled pool: {}'.format(n_pool - NUM_INIT_LB))
+        print('number of testing pool: {}'.format(n_test))
+        if strategy == "Random":
+            print(DATA_NAME)
+            print(strategy)
+            print('================Round {:d}==============='.format(iteration+1))
+            # query new samples
+            t0 = time.time()
+            # TODO implement active learning
+            new_indices, scores = random_sampling(n_pool, idxs_lb, acc_idxs, rej_idxs, NUM_QUERY)
+            t1 = time.time()
+            print("Query time is {:.2f}".format(t1-t0))
+        elif strategy == "Uncertainty":
+            print(DATA_NAME)
+            print(strategy)
+            print('================Round {:d}==============='.format(iteration+1))
+            samples = self.strategy.data_provider.train_representation(iteration)
+            pred = self.strategy.data_provider.get_pred(iteration, samples)
+            confidence = np.amax(softmax(pred, axis=1), axis=1)
+            uncertainty = 1-confidence
+            # query new samples
+            t0 = time.time()
+            new_indices, scores = uncerainty_sampling(n_pool, idxs_lb, acc_idxs, rej_idxs, NUM_QUERY, uncertainty=uncertainty)
+            t1 = time.time()
+            print("Query time is {:.2f}".format(t1-t0))
+        elif strategy == "TBSampling":
+            period = int(2/3*TOTAL_EPOCH)
+            print(DATA_NAME)
+            print("TBSampling")
+            print('================Round {:d}==============='.format(iteration+1))
+            t0 = time.time()
+            new_indices, scores = self._suggest_abnormal(strategy, iteration, idxs_lb, acc_idxs, rej_idxs, budget, period)
+            t1 = time.time()
+            print("Query time is {:.2f}".format(t1-t0))
+        elif strategy == "Feedback":
+            period = int(2/3*TOTAL_EPOCH)
+            print(DATA_NAME)
+            print("Feedback")
+            print('================Round {:d}==============='.format(iteration+1))
+            t0 = time.time()
+            new_indices, scores = self._suggest_abnormal(strategy, iteration, idxs_lb, acc_idxs, rej_idxs, budget, period)
+            t1 = time.time()
+            print("Query time is {:.2f}".format(t1-t0))
+        else:
+            raise NotImplementedError
+        true_labels = self.train_labels(iteration)
+        return new_indices, true_labels[new_indices], scores
+    def al_train(self, iteration, indices):
+        # TODO fix
+        raise NotImplementedError
+        # # customize ....
+        # CONTENT_PATH = self.strategy.data_provider.content_path
+        # # record output information
+        # now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
+        # sys.stdout = open(os.path.join(CONTENT_PATH, now+".txt"), "w")
+        # # for reproduce purpose
+        # print("New indices:\t{}".format(len(indices)))
+        # self.save_human_selection(iteration, indices)
+        # lb_idx = self.get_epoch_index(iteration)
+        # train_idx = np.hstack((lb_idx, indices))
+        # print("Training indices:\t{}".format(len(train_idx)))
+        # print("Valid indices:\t{}".format(len(set(train_idx))))
+        # TOTAL_EPOCH = self.strategy.config["TRAINING"]["total_epoch"]
+        # NET = self.strategy.config["TRAINING"]["NET"]
+        # DEVICE = self.strategy.data_provider.DEVICE
+        # NEW_ITERATION = self.get_max_iter() + 1
+        # GPU = self.strategy.config["GPU"]
+        # DATA_NAME = self.strategy.config["DATASET"]
+        # sys.path.append(CONTENT_PATH)
+        # # loading neural network
+        # from Model.model import resnet18
+        # task_model = resnet18()
+        # resume_path = self.strategy.data_provider.checkpoint_path(iteration)
+        # state_dict = torch.load(os.path.join(resume_path, "subject_model.pth"), map_location=torch.device("cpu"))
+        # task_model.load_state_dict(state_dict)
+        # self.save_iteration_index(NEW_ITERATION, train_idx)
+        # task_model_type = "pytorch"
+        # # start experiment
+        # n_pool = self.strategy.config["TRAINING"]["train_num"]  # 50000
+        # save_path = self.strategy.data_provider.checkpoint_path(NEW_ITERATION)
+        # os.makedirs(save_path, exist_ok=True)
+        # from query_strategies.random import RandomSampling
+        # q_strategy = RandomSampling(task_model, task_model_type, n_pool, lb_idx, 10, DATA_NAME, NET, gpu=GPU, **self.hyperparameters["TRAINING"])
+        # # print information
+        # print('================Round {:d}==============='.format(NEW_ITERATION))
+        # # update
+        # q_strategy.update_lb_idxs(train_idx)
+        # resnet_model = resnet18()
+        # train_dataset = torchvision.datasets.CIFAR10(root="..//data//CIFAR10", download=True, train=True, transform=self.hyperparameters["TRAINING"]['transform_tr'])
+        # test_dataset = torchvision.datasets.CIFAR10(root="..//data//CIFAR10", download=True, train=False, transform=self.hyperparameters["TRAINING"]['transform_te'])
+        # t1 = time.time()
+        # q_strategy.train(total_epoch=TOTAL_EPOCH, task_model=resnet_model, complete_dataset=train_dataset,save_path=None)
+        # t2 = time.time()
+        # print("Training time is {:.2f}".format(t2-t1))
+        # self.save_subject_model(NEW_ITERATION, q_strategy.task_model.state_dict())
+        # # compute accuracy at each round
+        # accu = q_strategy.test_accu(test_dataset)
+        # print('Accuracy {:.3f}'.format(100*accu))
+    def get_max_iter(self):
+        path  = os.path.join(self.strategy.data_provider.content_path, "Model")
+        dir_list = os.listdir(path)
+        iteration_name = self.strategy.data_provider.iteration_name
+        max_iter = -1
+        for dir in dir_list:
+            if "{}_".format(iteration_name) in dir:
+                i = int(dir.replace("{}_".format(iteration_name),""))
+                max_iter = max(max_iter, i)
+        return max_iter
+    def save_human_selection(self, iteration, indices):
+        """
+        save the selected index message from DVI frontend
+        :param epoch_id:
+        :param indices: list, selected indices
+        :return:
+        """
+        save_location = os.path.join(self.strategy.data_provider.checkpoint_path(iteration), "human_select.json")
+        with open(save_location, "w") as f:
+            json.dump(indices, f)
+    def save_iteration_index(self, iteration, idxs):
+        new_iteration_dir = self.strategy.data_provider.checkpoint_path(iteration)
+        os.makedirs(new_iteration_dir, exist_ok=True)
+        save_location = os.path.join(new_iteration_dir, "index.json")
+        with open(save_location, "w") as f:
+            json.dump(idxs.tolist(), f)
+    def save_subject_model(self, iteration, state_dict):
+        new_iteration_dir = self.strategy.data_provider.checkpoint_path(iteration)
+        model_path = os.path.join(new_iteration_dir, "subject_model.pth")
+        torch.save(state_dict, model_path)
+    def vis_train(self, iteration, resume_iter):
+        self.strategy.visualize_embedding(iteration, resume_iter)
+    #################################################################################################################
+    #                                                                                                               #
+    #                                            Sample Selection                                                  #
+    #                                                                                                               #
+    #################################################################################################################
+    def _save(self, iteration, ftm):
+        with open(os.path.join(self.strategy.data_provider.checkpoint_path(iteration), '{}_sample_recommender.pkl'.format(self.strategy.VIS_METHOD)), 'wb') as f:
+            pickle.dump(ftm, f, pickle.HIGHEST_PROTOCOL)
+    def _init_detection(self, iteration, lb_idxs, period=80):
+        # must be in the dense setting
+        assert "Dense" in self.strategy.VIS_METHOD
+        # prepare high dimensional trajectory
+        embedding_path = os.path.join(self.strategy.data_provider.checkpoint_path(iteration),'trajectory_embeddings.npy')
+        if os.path.exists(embedding_path):
+            trajectories = np.load(embedding_path)
+            print("Load trajectories from cache!")
+        else:
+            # extract samples
+            TOTAL_EPOCH = self.strategy.config["TRAINING"]["total_epoch"]
+            EPOCH_START = self.strategy.config["TRAINING"]["epoch_start"]
+            EPOCH_END = self.strategy.config["TRAINING"]["epoch_end"]
+            EPOCH_PERIOD = self.strategy.config["TRAINING"]["epoch_period"]
+            train_num = len(self.train_labels(None))
+            # change epoch_NUM
+            embeddings_2d = np.zeros((TOTAL_EPOCH, train_num, 2))
+            for i in range(EPOCH_START, EPOCH_END+1, EPOCH_PERIOD):
+                id = (i - EPOCH_START)//EPOCH_PERIOD
+                embeddings_2d[id] = self.strategy.projector.batch_project(iteration, i, self.strategy.data_provider.train_representation_all(iteration, i))
+            trajectories = np.transpose(embeddings_2d, [1,0,2])
+            np.save(embedding_path, trajectories)
+        # prepare uncertainty
+        uncertainty_path = os.path.join(self.strategy.data_provider.checkpoint_path(iteration), 'uncertainties.npy')
+        if os.path.exists(uncertainty_path):
+            uncertainty = np.load(uncertainty_path)
+        else:
+            TOTAL_EPOCH = self.strategy.config["TRAINING"]["total_epoch"]
+            EPOCH_START = self.strategy.config["TRAINING"]["epoch_start"]
+            EPOCH_END = self.strategy.config["TRAINING"]["epoch_end"]
+            EPOCH_PERIOD = self.strategy.config["TRAINING"]["epoch_period"]
+            train_num = len(self.train_labels(None))
+            samples = self.strategy.data_provider.train_representation_all(iteration, EPOCH_END)
+            pred = self.strategy.data_provider.get_pred(iteration, EPOCH_END, samples)
+            uncertainty = 1 - np.amax(softmax(pred, axis=1), axis=1)
+            np.save(uncertainty_path, uncertainty)
+        ulb_idxs = self.strategy.data_provider.get_unlabeled_idx(len(uncertainty), lb_idxs)
+        # prepare sampling manager
+        ntd_path = os.path.join(self.strategy.data_provider.checkpoint_path(iteration), '{}_sample_recommender.pkl'.format(self.strategy.VIS_METHOD))
+        if os.path.exists(ntd_path):
+            with open(ntd_path, 'rb') as f:
+                ntd = pickle.load(f)
+        else:
+            ntd = Recommender(uncertainty[ulb_idxs], trajectories[ulb_idxs], 30, period=period)
+            print("Detecting abnormal....")
+            ntd.clustered()
+            print("Finish detection!")
+            self._save(iteration, ntd)
+        return ntd, ulb_idxs
+    def _suggest_abnormal(self, strategy, iteration, lb_idxs, acc_idxs, rej_idxs, budget, period):
+        ntd,ulb_idxs = self._init_detection(iteration, lb_idxs, period)
+        map_ulb = ulb_idxs.tolist()
+        map_acc_idxs = np.array([map_ulb.index(i) for i in acc_idxs]).astype(np.int32)
+        map_rej_idxs = np.array([map_ulb.index(i) for i in rej_idxs]).astype(np.int32)
+        if strategy == "TBSampling":
+            suggest_idxs, scores = ntd.sample_batch_init(map_acc_idxs, map_rej_idxs, budget)
+        elif strategy == "Feedback":
+            suggest_idxs, scores = ntd.sample_batch(map_acc_idxs, map_rej_idxs, budget)
+        else:
+            raise NotImplementedError
+        return ulb_idxs[suggest_idxs], scores
+    def _suggest_normal(self, strategy, iteration, lb_idxs, acc_idxs, rej_idxs, budget, period):
+        ntd, ulb_idxs = self._init_detection(iteration, lb_idxs, period)
+        map_ulb = ulb_idxs.tolist()
+        map_acc_idxs = np.array([map_ulb.index(i) for i in acc_idxs]).astype(np.int32)
+        map_rej_idxs = np.array([map_ulb.index(i) for i in rej_idxs]).astype(np.int32)
+        if strategy == "TBSampling":
+            suggest_idxs, _ = ntd.sample_batch_normal_init(map_acc_idxs, map_rej_idxs, budget)
+        elif strategy == "Feedback":
+            suggest_idxs, _ = ntd.sample_batch_normal(map_acc_idxs, map_rej_idxs, budget)
+        else:
+            raise NotImplementedError
+        return ulb_idxs[suggest_idxs]
+class AnormalyContext(VisContext):
+    def __init__(self, strategy) -> None:
+        super().__init__(strategy)
+        EPOCH_START = self.strategy.config["EPOCH_START"]
+        EPOCH_END = self.strategy.config["EPOCH_END"]
+        EPOCH_PERIOD = self.strategy.config["EPOCH_PERIOD"]
+        self.period = int(2/3*((EPOCH_END-EPOCH_START)/EPOCH_PERIOD+1))
+        file_path = os.path.join(self.strategy.data_provider.content_path, 'clean_label.json')
+        with open(file_path, "r") as f:
+            self.clean_labels = np.array(json.load(f))
+    def reset(self):
+        return
+    #################################################################################################################
+    #                                                                                                               #
+    #                                            Anormaly Detection                                                 #
+    #                                                                                                               #
+    #################################################################################################################
+    def _save(self, ntd):
+        with open(os.path.join(self.strategy.data_provider.content_path, '{}_sample_recommender.pkl'.format(self.strategy.VIS_METHOD)), 'wb') as f:
+            pickle.dump(ntd, f, pickle.HIGHEST_PROTOCOL)
+    def _init_detection(self):
+        # prepare trajectories
+        embedding_path = os.path.join(self.strategy.data_provider.content_path, 'trajectory_embeddings.npy')
+        if os.path.exists(embedding_path):
+            trajectories = np.load(embedding_path)
+        else:
+            # extract samples
+            train_num = self.strategy.data_provider.train_num
+            # change epoch_NUM
+            epoch_num = (self.strategy.data_provider.e - self.strategy.data_provider.s)//self.strategy.data_provider.p + 1
+            embeddings_2d = np.zeros((epoch_num, train_num, 2))
+            for i in range(self.strategy.data_provider.s, self.strategy.data_provider.e+1, self.strategy.data_provider.p):
+                id = (i - self.strategy.data_provider.s)//self.strategy.data_provider.p
+                embeddings_2d[id] = self.strategy.projector.batch_project(i, self.strategy.data_provider.train_representation(i))
+            trajectories = np.transpose(embeddings_2d, [1,0,2])
+            np.save(embedding_path, trajectories)
+        # prepare uncertainty scores
+        uncertainty_path = os.path.join(self.strategy.data_provider.content_path, 'uncertainties.npy')
+        if os.path.exists(uncertainty_path):
+            uncertainty = np.load(uncertainty_path)
+        else:
+            epoch_num = (self.strategy.data_provider.e - self.strategy.data_provider.s)//self.strategy.data_provider.p + 1
+            samples = self.strategy.data_provider.train_representation(epoch_num)
+            pred = self.strategy.data_provider.get_pred(epoch_num, samples)
+            uncertainty = 1 - np.amax(softmax(pred, axis=1), axis=1)
+            np.save(uncertainty_path, uncertainty)
+        # prepare sampling manager
+        ntd_path = os.path.join(self.strategy.data_provider.content_path, '{}_sample_recommender.pkl'.format(self.strategy.VIS_METHOD))
+        if os.path.exists(ntd_path):
+            with open(ntd_path, 'rb') as f:
+                ntd = pickle.load(f)
+        else:
+            ntd = Recommender(uncertainty, trajectories, 30, self.period)
+            print("Detecting abnormal....")
+            ntd.clustered()
+            print("Finish detection!")
+            self._save(ntd)
+        return ntd
+    def suggest_abnormal(self, strategy, acc_idxs, rej_idxs, budget):
+        ntd = self._init_detection()
+        if strategy == "TBSampling":
+            suggest_idxs, scores = ntd.sample_batch_init(acc_idxs, rej_idxs, budget)
+        elif strategy == "Feedback":
+            suggest_idxs, scores = ntd.sample_batch(acc_idxs, rej_idxs, budget)
+        else:
+            raise NotImplementedError
+        suggest_labels = self.clean_labels[suggest_idxs]
+        return suggest_idxs, scores, suggest_labels
+    def suggest_normal(self, strategy, acc_idxs, rej_idxs, budget):
+        ntd = self._init_detection()
+        if strategy == "TBSampling":
+            suggest_idxs, _ = ntd.sample_batch_normal_init(acc_idxs, rej_idxs, budget)
+        elif strategy == "Feedback":
+            suggest_idxs, _ = ntd.sample_batch_normal(acc_idxs, rej_idxs, budget)
+        else:
+            raise NotImplementedError
+        suggest_labels = self.clean_labels[suggest_idxs]
+        return suggest_idxs, suggest_labels

saved_models/codesearch_simp/dataFeature.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

saved_models/codesearch_simp/gen_label.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import json
+import torch
+import os
+# file_path = '/home/yiming/ContrastDebugger/EXP/codesearch_query/Model/Epoch_1/index.json'
+# # 打开并读取 JSON 文件
+# with open(file_path, 'r') as file:
+#     json_data = json.load(file)
+# testset_label = None
+# for i in range(len(json_data)):
+#     if testset_label != None:
+#         testset_label = torch.cat((testset_label, torch.tensor([0])), 0)
+#     else:
+#         testset_label = torch.tensor([0])
+# torch.save(testset_label, "/home/yiming/ContrastDebugger/EXP/codesearch_query/Training_data/training_dataset_label.pth")
+input_file = "/home/yiming/ContrastDebugger/EXP/codesearch/Model/label_list.json"
+output_file = "/home/yiming/ContrastDebugger/EXP/codesearch/Model/new_label_list.json"  # 替换为输出文件的路径
+# 读取输入文件
+with open(input_file, "r") as f:
+    data = json.load(f)
+# 提取每个数据的前十个字符
+processed_data = [item[:30] for item in data]
+# 保存到新的 JSON 文件
+with open(output_file, "w") as f:
+    json.dump(processed_data, f)

saved_models/codesearch_simp/server/__init__.py ADDED Viewed

File without changes

saved_models/codesearch_simp/server/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (9.66 kB). View file

saved_models/codesearch_simp/server/__pycache__/utils.cpython-37.pyc ADDED Viewed

Binary file (9.53 kB). View file

saved_models/codesearch_simp/server/admin_API_result.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

saved_models/codesearch_simp/server/server.py ADDED Viewed

	@@ -0,0 +1,620 @@

+from flask import request, Flask, jsonify, make_response
+from flask_cors import CORS, cross_origin
+from PIL import Image, ImageDraw, ImageFont
+import base64
+import os
+import sys
+import json
+import pickle
+import io
+import numpy as np
+import gc
+import shutil
+from utils import update_epoch_projection, initialize_backend, add_line,getCriticalChangeIndices, getConfChangeIndices, getContraVisChangeIndices, getContraVisChangeIndicesSingle
+import time
+# flask for API server
+app = Flask(__name__)
+cors = CORS(app, supports_credentials=True)
+app.config['CORS_HEADERS'] = 'Content-Type'
+API_result_path = "./admin_API_result.csv"
+@app.route('/updateProjection', methods=["POST", "GET"])
+@cross_origin()
+def update_projection():
+    res = request.get_json()
+    start_time = time.time()
+    CONTENT_PATH = os.path.normpath(res['path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    iteration = int(res['iteration'])
+    predicates = res["predicates"]
+    username = res['username']
+    isContraVis = res['isContraVis']
+    # sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    # use the true one
+    # EPOCH = (iteration-1)*context.strategy.data_provider.p + context.strategy.data_provider.s
+    EPOCH = int(iteration)
+    embedding_2d, grid, decision_view, label_name_dict, label_color_list, label_list, max_iter, training_data_index, \
+    testing_data_index, eval_new, prediction_list, selected_points, properties, highlightedPointIndices = update_epoch_projection(context, EPOCH, predicates, isContraVis)
+    if (len(highlightedPointIndices) != 0):
+        highlightedPointIndices = highlightedPointIndices.tolist()
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print("updateprojection", elapsed_time)
+    # sys.path.remove(CONTENT_PATH)
+    # add_line(API_result_path,['TT',username])
+    return make_response(jsonify({'result': embedding_2d,
+                                  'grid_index': grid.tolist(),
+                                  'grid_color': 'data:image/png;base64,' + decision_view,
+                                  'label_name_dict':label_name_dict,
+                                  'label_color_list': label_color_list,
+                                  'label_list': label_list,
+                                  'maximum_iteration': max_iter,
+                                  'training_data': training_data_index,
+                                  'testing_data': testing_data_index,
+                                  'evaluation': eval_new,
+                                  'prediction_list': prediction_list,
+                                  "selectedPoints":selected_points.tolist(),
+                                  "properties":properties.tolist(),
+                                  "highlightedPointIndices": highlightedPointIndices
+                                  }), 200)
+@app.route('/highlightCriticalChange', methods=["POST", "GET"])
+@cross_origin()
+def highlight_critical_change():
+    res = request.get_json()
+    CONTENT_PATH = os.path.normpath(res['path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    curr_iteration = int(res['iteration'])
+    last_iteration = int(res['last_iteration'])
+    username = res['username']
+    # sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    predChangeIndices = getCriticalChangeIndices(context, curr_iteration, last_iteration)
+    # sys.path.remove(CONTENT_PATH)
+    # add_line(API_result_path,['TT',username])
+    return make_response(jsonify({
+                                  "predChangeIndices": predChangeIndices.tolist()
+                                  }), 200)
+@app.route('/contraVisHighlight', methods=["POST", "GET"])
+@cross_origin()
+def contravis_highlight():
+    res = request.get_json()
+    CONTENT_PATH = os.path.normpath(res['path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    curr_iteration = int(res['iterationLeft'])
+    last_iteration = int(res['iterationRight'])
+    method = res['method']
+    username = res['username']
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    contraVisChangeIndices = getContraVisChangeIndices(context, curr_iteration, last_iteration, method)
+    print(len(contraVisChangeIndices))
+    return make_response(jsonify({
+                                  "contraVisChangeIndices": contraVisChangeIndices
+                                  }), 200)
+@app.route('/contraVisHighlightSingle', methods=["POST", "GET"])
+@cross_origin()
+def contravis_highlight_single():
+    start_time = time.time()
+    res = request.get_json()
+    CONTENT_PATH = os.path.normpath(res['path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    curr_iteration = int(res['iterationLeft'])
+    last_iteration = int(res['iterationRight'])
+    method = res['method']
+    left_selected = res['selectedPointLeft']
+    right_selected = res['selectedPointRight']
+    username = res['username']
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    contraVisChangeIndicesLeft, contraVisChangeIndicesRight, contraVisChangeIndicesLeftLeft, contraVisChangeIndicesLeftRight, contraVisChangeIndicesRightLeft, contraVisChangeIndicesRightRight = getContraVisChangeIndicesSingle(context, curr_iteration, last_iteration, method, left_selected, right_selected)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(elapsed_time)
+    return make_response(jsonify({
+                                  "contraVisChangeIndicesLeft": contraVisChangeIndicesLeft,
+                                  "contraVisChangeIndicesRight": contraVisChangeIndicesRight,
+                                  "contraVisChangeIndicesLeftLeft": contraVisChangeIndicesLeftLeft,
+                                  "contraVisChangeIndicesLeftRight": contraVisChangeIndicesLeftRight,
+                                  "contraVisChangeIndicesRightLeft": contraVisChangeIndicesRightLeft,
+                                  "contraVisChangeIndicesRightRight": contraVisChangeIndicesRightRight
+                                  }), 200)
+@app.route('/highlightConfChange', methods=["POST", "GET"])
+@cross_origin()
+def highlight_conf_change():
+    res = request.get_json()
+    CONTENT_PATH = os.path.normpath(res['path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    curr_iteration = int(res['iteration'])
+    last_iteration = int(res['last_iteration'])
+    confChangeInput = float(res['confChangeInput'])
+    print(confChangeInput)
+    username = res['username']
+    # sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    confChangeIndices = getConfChangeIndices(context, curr_iteration, last_iteration, confChangeInput)
+    print(confChangeIndices)
+    # sys.path.remove(CONTENT_PATH)
+    # add_line(API_result_path,['TT',username])
+    return make_response(jsonify({
+                                  "confChangeIndices": confChangeIndices.tolist()
+                                  }), 200)
+@app.route('/query', methods=["POST"])
+@cross_origin()
+def filter():
+    start_time = time.time()
+    res = request.get_json()
+    CONTENT_PATH = os.path.normpath(res['content_path'])
+    VIS_METHOD = res['vis_method']
+    SETTING = res["setting"]
+    iteration = int(res['iteration'])
+    predicates = res["predicates"]
+    username = res['username']
+    sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    # TODO: fix when active learning
+    EPOCH = (iteration-1)*context.strategy.data_provider.p + context.strategy.data_provider.s
+    training_data_number = context.strategy.config["TRAINING"]["train_num"]
+    testing_data_number = context.strategy.config["TRAINING"]["test_num"]
+    current_index = context.get_epoch_index(EPOCH)
+    selected_points = np.arange(training_data_number)[current_index]
+    selected_points = np.concatenate((selected_points, np.arange(training_data_number, training_data_number + testing_data_number, 1)), axis=0)
+    # selected_points = np.arange(training_data_number + testing_data_number)
+    for key in predicates.keys():
+        if key == "label":
+            tmp = np.array(context.filter_label(predicates[key], int(EPOCH)))
+        elif key == "type":
+            tmp = np.array(context.filter_type(predicates[key], int(EPOCH)))
+        elif key == "confidence":
+            tmp = np.array(context.filter_conf(predicates[key][0],predicates[key][1],int(EPOCH)))
+        else:
+            tmp = np.arange(training_data_number + testing_data_number)
+        selected_points = np.intersect1d(selected_points, tmp)
+    sys.path.remove(CONTENT_PATH)
+    add_line(API_result_path,['SQ',username])
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print("query", elapsed_time)
+    return make_response(jsonify({"selectedPoints": selected_points.tolist()}), 200)
+# base64
+@app.route('/spriteImage', methods=["POST","GET"])
+@cross_origin()
+def sprite_image():
+    path = request.args.get("path")
+    index = request.args.get("index")
+    username = request.args.get("username")
+    CONTENT_PATH = os.path.normpath(path)
+    print('index', index)
+    idx = int(index)
+    pic_save_dir_path = os.path.join(CONTENT_PATH, "sprites", "{}.png".format(idx))
+    img_stream = ''
+    with open(pic_save_dir_path, 'rb') as img_f:
+        img_stream = img_f.read()
+        img_stream = base64.b64encode(img_stream).decode()
+    add_line(API_result_path,['SI',username])
+    return make_response(jsonify({"imgUrl":'data:image/png;base64,' + img_stream}), 200)
+@app.route('/spriteText', methods=["POST","GET"])
+@cross_origin()
+def sprite_text():
+    path = request.args.get("path")
+    index = request.args.get("index")
+    username = request.args.get("username")
+    iteration = request.args.get("iteration")
+    # Adjust font path as needed. Use a path to a .ttf file on your system, or remove the 'truetype' part to use a default font.
+   # Load font - ensure 'arial.ttf' is available at this path or use a default font
+    # try:
+    #     font = ImageFont.truetype("arial.ttf", 15)
+    # except IOError:
+    #     font = ImageFont.load_default()
+    # # Calculate image size dynamically based on text length
+    # text_width, text_height = font.getsize(text)
+    # image_size = (text_width, text_height)  # Add some padding
+    # # Create an image
+    # background_color = "white"
+    # font_color = "black"
+    # image = Image.new("RGB", image_size, background_color)
+    # draw = ImageDraw.Draw(image)
+    # draw.text((1, 1), text, fill=font_color, font=font)  # Start drawing the text from a small margin
+    # # Save the image to a BytesIO object
+    # img_io = io.BytesIO()
+    # image.save(img_io, 'PNG')
+    # img_io.seek(0)
+       # Assuming you have a function to get sprite texts
+    # sprite_texts = get_sprite_texts(CONTENT_PATH, idx)
+    # # Include both the image and texts in the response
+    # response_data = {
+    #     "texts": sprite_texts
+    # }
+    # return make_response(jsonify(response_data), 200)
+    CONTENT_PATH = os.path.normpath(path)
+    idx = int(index)
+    start = time.time()
+    # text_save_dir_path = os.path.join(CONTENT_PATH, f"/Model/Epoch_{iteration}/labels",  "text_{}.txt".format(idx))
+    text_save_dir_path = os.path.join(CONTENT_PATH, f"Model/Epoch_{iteration}/labels", f"text_{idx}.txt")
+    if os.path.exists(text_save_dir_path):
+        with open(text_save_dir_path, 'r') as text_f:
+            # Read the contents of the file and store it in sprite_texts
+            sprite_texts = text_f.read()
+    else:
+        print("File does not exist:", text_save_dir_path)
+    print(sprite_texts)
+    response_data = {
+        "texts": sprite_texts
+    }
+    end = time.time()
+    print("processTime", end-start)
+    return make_response(jsonify(response_data), 200)
+    # img_stream = ''
+    # with open(text_save_dir_path, 'rb') as img_f:
+    #     img_stream = img_f.read()
+    #     img_stream = base64.b64encode(img_stream).decode()
+    # img_stream = base64.b64encode(img_io.getvalue()).decode()
+    # Return the base64-encoded image as JSON
+    # return make_response(jsonify({"imgUrl": 'data:image/png;base64,' + img_stream}), 200)
+# @app.route('/spriteList', methods=["POST"])
+# @cross_origin()
+# def sprite_list_image():
+#     data = request.get_json()
+#     indices = data["index"]
+#     path = data["path"]
+#     CONTENT_PATH = os.path.normpath(path)
+#     length = len(indices)
+#     urlList = {}
+#     start_time = time.time()
+#     for i in range(length):
+#         idx = indices[i]
+#         pic_save_dir_path = os.path.join(CONTENT_PATH, "sprites", "{}.png".format(idx))
+#         img_stream = ''
+#         with open(pic_save_dir_path, 'rb') as img_f:
+#             img_stream = img_f.read()
+#             img_stream = base64.b64encode(img_stream).decode()
+#             urlList[idx] = 'data:image/png;base64,' + img_stream
+#             # urlList.append('data:image/png;base64,' + img_stream)
+#     end_time = time.time()
+#     elapsed_time = end_time - start_time
+#     print("Spritelist", elapsed_time)
+#     return make_response(jsonify({"urlList":urlList}), 200)
+@app.route('/spriteList', methods=["POST"])
+@cross_origin()
+def sprite_list_image():
+    data = request.get_json()
+    indices = data["index"]
+    path = data["path"]
+    CONTENT_PATH = os.path.normpath(path)
+    length = len(indices)
+    urlList = {}
+    start_time = time.time()
+    for i in range(length):
+        idx = indices[i]
+        pic_save_dir_path = os.path.join(CONTENT_PATH, "sprites", "{}.png".format(idx))
+        img_stream = ''
+        with open(pic_save_dir_path, 'rb') as img_f:
+            img_stream = img_f.read()
+            img_stream = base64.b64encode(img_stream).decode()
+            urlList[idx] = 'data:image/png;base64,' + img_stream
+            # urlList.append('data:image/png;base64,' + img_stream)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print("Spritelist", elapsed_time)
+    return make_response(jsonify({"urlList":urlList}), 200)
+@app.route('/al_query', methods=["POST"])
+@cross_origin()
+def al_query():
+    data = request.get_json()
+    CONTENT_PATH = os.path.normpath(data['content_path'])
+    VIS_METHOD = data['vis_method']
+    SETTING = data["setting"]
+    # TODO fix iteration, align with frontend
+    iteration = data["iteration"]
+    strategy = data["strategy"]
+    budget = int(data["budget"])
+    acc_idxs = data["accIndices"]
+    rej_idxs = data["rejIndices"]
+    user_name = data["username"]
+    isRecommend = data["isRecommend"]
+    sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING, dense=True)
+    # TODO add new sampling rule
+    indices, labels, scores = context.al_query(iteration, budget, strategy, np.array(acc_idxs).astype(np.int64), np.array(rej_idxs).astype(np.int64))
+    sort_i = np.argsort(-scores)
+    indices = indices[sort_i]
+    labels = labels[sort_i]
+    scores = scores[sort_i]
+    sys.path.remove(CONTENT_PATH)
+    if not isRecommend:
+        add_line(API_result_path,['Feedback', user_name])
+    else:
+        add_line(API_result_path,['Recommend', user_name])
+    return make_response(jsonify({"selectedPoints": indices.tolist(), "scores": scores.tolist(), "suggestLabels":labels.tolist()}), 200)
+@app.route('/anomaly_query', methods=["POST"])
+@cross_origin()
+def anomaly_query():
+    data = request.get_json()
+    CONTENT_PATH = os.path.normpath(data['content_path'])
+    VIS_METHOD = data['vis_method']
+    SETTING = data["setting"]
+    budget = int(data["budget"])
+    strategy = data["strategy"]
+    acc_idxs = data["accIndices"]
+    rej_idxs = data["rejIndices"]
+    user_name = data["username"]
+    isRecommend = data["isRecommend"]
+    sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    context.save_acc_and_rej(acc_idxs, rej_idxs, user_name)
+    indices, scores, labels = context.suggest_abnormal(strategy, np.array(acc_idxs).astype(np.int64), np.array(rej_idxs).astype(np.int64), budget)
+    clean_list,_ = context.suggest_normal(strategy, np.array(acc_idxs).astype(np.int64), np.array(rej_idxs).astype(np.int64), 1)
+    sort_i = np.argsort(-scores)
+    indices = indices[sort_i]
+    labels = labels[sort_i]
+    scores = scores[sort_i]
+    sys.path.remove(CONTENT_PATH)
+    if not isRecommend:
+        add_line(API_result_path,['Feedback', user_name])
+    else:
+        add_line(API_result_path,['Recommend', user_name])
+    return make_response(jsonify({"selectedPoints": indices.tolist(), "scores": scores.tolist(), "suggestLabels":labels.tolist(),"cleanList":clean_list.tolist()}), 200)
+@app.route('/al_train', methods=["POST"])
+@cross_origin()
+def al_train():
+    data = request.get_json()
+    CONTENT_PATH = os.path.normpath(data['content_path'])
+    VIS_METHOD = data['vis_method']
+    SETTING = data["setting"]
+    acc_idxs = data["accIndices"]
+    rej_idxs = data["rejIndices"]
+    iteration = data["iteration"]
+    user_name = data["username"]
+    sys.path.append(CONTENT_PATH)
+    # default setting al_train is light version, we only save the last epoch
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    context.save_acc_and_rej(iteration, acc_idxs, rej_idxs, user_name)
+    context.al_train(iteration, acc_idxs)
+    NEW_ITERATION =  context.get_max_iter()
+    context.vis_train(NEW_ITERATION, iteration)
+    # update iteration projection
+    embedding_2d, grid, decision_view, label_name_dict, label_color_list, label_list, _, training_data_index, \
+    testing_data_index, eval_new, prediction_list, selected_points, properties = update_epoch_projection(context, NEW_ITERATION, dict())
+    # rewirte json =========
+    res_json_path = os.path.join(CONTENT_PATH, "iteration_structure.json")
+    with open(res_json_path,encoding='utf8')as fp:
+        json_data = json.load(fp)
+        json_data.append({'value': NEW_ITERATION, 'name': 'iteration', 'pid': iteration})
+        print('json_data',json_data)
+    with open(res_json_path,'w')as r:
+      json.dump(json_data, r)
+    r.close()
+    # rewirte json =========
+    del config
+    gc.collect()
+    sys.path.remove(CONTENT_PATH)
+    add_line(API_result_path,['al_train', user_name])
+    return make_response(jsonify({'result': embedding_2d, 'grid_index': grid, 'grid_color': 'data:image/png;base64,' + decision_view,
+                                  'label_name_dict': label_name_dict,
+                                  'label_color_list': label_color_list, 'label_list': label_list,
+                                  'maximum_iteration': NEW_ITERATION, 'training_data': training_data_index,
+                                  'testing_data': testing_data_index, 'evaluation': eval_new,
+                                  'prediction_list': prediction_list,
+                                  "selectedPoints":selected_points.tolist(),
+                                  "properties":properties.tolist()}), 200)
+def clear_cache(con_paths):
+    for CONTENT_PATH in con_paths.values():
+        ac_flag = False
+        target_path = os.path.join(CONTENT_PATH, "Model")
+        dir_list = os.listdir(target_path)
+        for dir in dir_list:
+            if "Iteration_" in dir:
+                ac_flag=True
+                i = int(dir.replace("Iteration_", ""))
+                if i > 2:
+                    shutil.rmtree(os.path.join(target_path, dir))
+        if ac_flag:
+            iter_structure_path = os.path.join(CONTENT_PATH, "iteration_structure.json")
+            with open(iter_structure_path, "r") as f:
+                i_s = json.load(f)
+            new_is = list()
+            for item in i_s:
+                value = item["value"]
+                if value < 3:
+                    new_is.append(item)
+            with open(iter_structure_path, "w") as f:
+                json.dump(new_is, f)
+            print("Successfully remove cache data!")
+@app.route('/login', methods=["POST"])
+@cross_origin()
+def login():
+    data = request.get_json()
+    # username = data["username"]
+    # password = data["password"]
+    content_path = data["content_path"]
+    # clear_cache(con_paths)
+    # Verify username and password
+    return make_response(jsonify({"normal_content_path": content_path, "unormaly_content_path": content_path}), 200)
+@app.route('/boundingbox_record', methods=["POST"])
+@cross_origin()
+def record_bb():
+    data = request.get_json()
+    username = data['username']
+    add_line(API_result_path,['boundingbox', username])
+    return make_response(jsonify({}), 200)
+@app.route('/all_result_list', methods=["POST"])
+@cross_origin()
+def get_res():
+    data = request.get_json()
+    CONTENT_PATH = os.path.normpath(data['content_path'])
+    VIS_METHOD = data['vis_method']
+    SETTING = data["setting"]
+    username = data["username"]
+    predicates = dict() # placeholder
+    results = dict()
+    imglist = dict()
+    gridlist = dict()
+    sys.path.append(CONTENT_PATH)
+    context = initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING)
+    EPOCH_START = context.strategy.config["EPOCH_START"]
+    EPOCH_PERIOD = context.strategy.config["EPOCH_PERIOD"]
+    EPOCH_END = context.strategy.config["EPOCH_END"]
+    epoch_num = (EPOCH_END - EPOCH_START)// EPOCH_PERIOD + 1
+    for i in range(1, epoch_num+1, 1):
+        EPOCH = (i-1)*EPOCH_PERIOD + EPOCH_START
+        trustvis = initialize_backend(CONTENT_PATH)
+        # detect whether we have query before
+        fname = "Epoch" if trustvis.data_provider.mode == "normal" or trustvis.data_provider.mode == "abnormal" else "Iteration"
+        checkpoint_path = context.strategy.data_provider.checkpoint_path(EPOCH)
+        bgimg_path = os.path.join(checkpoint_path, "bgimg.png")
+        embedding_path = os.path.join(checkpoint_path, "embedding.npy")
+        grid_path = os.path.join(checkpoint_path, "grid.pkl")
+        if os.path.exists(bgimg_path) and os.path.exists(embedding_path) and os.path.exists(grid_path):
+            path = os.path.join(trustvis.data_provider.model_path, "{}_{}".format(fname, EPOCH))
+            result_path = os.path.join(path,"embedding.npy")
+            results[str(i)] = np.load(result_path).tolist()
+            with open(os.path.join(path, "grid.pkl"), "rb") as f:
+                grid = pickle.load(f)
+            gridlist[str(i)] = grid
+        else:
+            embedding_2d, grid, _, _, _, _, _, _, _, _, _, _, _ = update_epoch_projection(trustvis, EPOCH, predicates)
+            results[str(i)] = embedding_2d
+            gridlist[str(i)] = grid
+        # read background img
+        with open(bgimg_path, 'rb') as img_f:
+            img_stream = img_f.read()
+        img_stream = base64.b64encode(img_stream).decode()
+        imglist[str(i)] = 'data:image/png;base64,' + img_stream
+        # imglist[str(i)] = "http://{}{}".format(ip_adress, bgimg_path)
+    sys.path.remove(CONTENT_PATH)
+    del config
+    gc.collect()
+    add_line(API_result_path,['animation', username])
+    return make_response(jsonify({"results":results,"bgimgList":imglist, "grid": gridlist}), 200)
+@app.route('/get_itertaion_structure', methods=["POST", "GET"])
+@cross_origin()
+def get_tree():
+    CONTENT_PATH = request.args.get("path")
+    res_json_path = os.path.join(CONTENT_PATH, "iteration_structure.json")
+    with open(res_json_path,encoding='utf8')as fp:
+        json_data = json.load(fp)
+    return make_response(jsonify({"structure":json_data}), 200)
+def check_port_inuse(port, host):
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.settimeout(1)
+        s.connect((host, port))
+        return True
+    except socket.error:
+        return False
+    finally:
+        if s:
+            s.close()
+if __name__ == "__main__":
+    import socket
+    hostname = socket.gethostname()
+    ip_address = socket.gethostbyname(hostname)
+    port = 5000
+    while check_port_inuse(port, ip_address):
+        port = port + 1
+    app.run(host=ip_address, port=int(port))

saved_models/codesearch_simp/server/utils.py ADDED Viewed

	@@ -0,0 +1,475 @@

+import os
+import json
+import time
+import csv
+import numpy as np
+import sys
+import pickle
+import base64
+from scipy.special import softmax
+vis_path = ".."
+sys.path.append(vis_path)
+from context import VisContext, ActiveLearningContext, AnormalyContext
+from strategy import DeepDebugger, TimeVis, tfDeepVisualInsight, DVIAL, tfDVIDenseAL, TimeVisDenseAL, TrustActiveLearningDVI,DeepVisualInsight, TrustProxyDVI
+from singleVis.eval.evaluate import evaluate_isAlign, evaluate_isNearestNeighbour, evaluate_isAlign_single, evaluate_isNearestNeighbour_single
+"""Interface align"""
+def initialize_strategy(CONTENT_PATH, VIS_METHOD, SETTING, dense=False):
+    # initailize strategy (visualization method)
+    with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
+        conf = json.load(f)
+    config = conf[VIS_METHOD]
+    # todo support timevis, curretnly only support dvi
+    # remove unnecessary parts
+    if SETTING == "normal" or SETTING == "abnormal":
+        if VIS_METHOD == "TrustVisActiveLearning":
+            strategy = TrustActiveLearningDVI(CONTENT_PATH, config)
+        elif VIS_METHOD == "TrustVisProxy":
+            strategy = TrustProxyDVI(CONTENT_PATH, config)
+        elif VIS_METHOD == "DVI":
+            strategy = DeepVisualInsight(CONTENT_PATH, config)
+        elif VIS_METHOD == "TimeVis":
+            strategy = TimeVis(CONTENT_PATH, config)
+        elif VIS_METHOD == "DeepDebugger":
+            strategy = DeepDebugger(CONTENT_PATH, config)
+        else:
+            raise NotImplementedError
+    elif SETTING == "active learning":
+        if dense:
+            if VIS_METHOD == "DVI":
+                strategy = tfDVIDenseAL(CONTENT_PATH, config)
+            elif VIS_METHOD == "TimeVis":
+                strategy = TimeVisDenseAL(CONTENT_PATH, config)
+            else:
+                raise NotImplementedError
+        else:
+            strategy = DVIAL(CONTENT_PATH, config)
+    else:
+        raise NotImplementedError
+    return strategy
+# todo remove unnecessary parts
+def initialize_context(strategy, setting):
+    if setting == "normal":
+        context = VisContext(strategy)
+    elif setting == "active learning":
+        context = ActiveLearningContext(strategy)
+    elif setting == "abnormal":
+        context = AnormalyContext(strategy)
+    else:
+        raise NotImplementedError
+    return context
+def initialize_backend(CONTENT_PATH, VIS_METHOD, SETTING, dense=False):
+    """ initialize backend for visualization
+    Args:
+        CONTENT_PATH (str): the directory to training process
+        VIS_METHOD (str): visualization strategy
+            "DVI", "TimeVis", "DeepDebugger",...
+        setting (str): context
+            "normal", "active learning", "dense al", "abnormal"
+    Raises:
+        NotImplementedError: _description_
+    Returns:
+        backend: a context with a specific strategy
+    """
+    strategy = initialize_strategy(CONTENT_PATH, VIS_METHOD, SETTING, dense)
+    context = initialize_context(strategy=strategy, setting=SETTING)
+    return context
+def get_train_test_data(context, EPOCH):
+    train_data = context.train_representation_data(EPOCH)
+    test_data = context.test_representation_data(EPOCH)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    return all_data
+def get_train_test_label(context, EPOCH):
+    train_labels = context.train_labels(EPOCH)
+    test_labels = context.test_labels(EPOCH)
+    labels = np.concatenate((train_labels, test_labels), axis=0).astype(int)
+    return labels
+# def get_strategy_by_setting(CONTENT_PATH, config, VIS_METHOD, SETTING, dense=False):
+#     if SETTING == "normal" or SETTING == "abnormal":
+#         if VIS_METHOD == "DVI":
+#             strategy = tfDeepVisualInsight(CONTENT_PATH, config)
+#         elif VIS_METHOD == "TimeVis":
+#             strategy = TimeVis(CONTENT_PATH, config)
+#         elif VIS_METHOD == "DeepDebugger":
+#             strategy = DeepDebugger(CONTENT_PATH, config)
+#         else:
+#             raise NotImplementedError
+#     elif SETTING == "active learning":
+#         if dense:
+#             if VIS_METHOD == "DVI":
+#                 strategy = tfDVIDenseAL(CONTENT_PATH, config)
+#             elif VIS_METHOD == "TimeVis":
+#                 strategy = TimeVisDenseAL(CONTENT_PATH, config)
+#             else:
+#                 raise NotImplementedError
+#         else:
+#             strategy = DVIAL(CONTENT_PATH, config)
+#     else:
+#         raise NotImplementedError
+#     return strategy
+# def update_embeddings(new_strategy, context, EPOCH, all_data, is_focus):
+#     embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy")
+#     if os.path.exists(embedding_path):
+#             original_embedding_2d = np.load(embedding_path)
+#         dd = TimeVis(context.contentpath,new_conf)
+#         dd._preprocess()
+#         dd._train()
+#         embedding_2d = dd.projector.batch_project(EPOCH, all_data)
+#     return embedding_2d
+# def find_and_add_nearest_neighbors(data, subset_indices, num_neighbors=10):
+#     dimension = len(data[0])  # Assuming all data points have the same dimension
+#     t = AnnoyIndex(dimension, 'euclidean')  # 'euclidean' distance metric; you can use 'angular' as well
+#     # Build the index with the entire data
+#     for i, vector in enumerate(data):
+#         t.add_item(i, vector)
+#     t.build(10)  # Number of trees. More trees gives higher precision.
+#     # Use a set for faster look-up and ensuring no duplicates
+#     subset_indices_set = set(subset_indices)
+#     for idx in subset_indices:
+#         nearest_neighbors = t.get_nns_by_item(idx, num_neighbors)
+#         # Use set union operation to merge indices without duplicates
+#         subset_indices_set = subset_indices_set.union(nearest_neighbors)
+#     # Convert set back to list
+#     return list(subset_indices_set)
+# def get_expanded_subset(context, EPOCH, subset_indices):
+#     all_data = get_train_test_data(context, EPOCH)
+#     expanded_subset = find_and_add_nearest_neighbors(all_data, subset_indices)
+#     return expanded_subset
+# def update_vis_error_points(new_strategy, context, EPOCH, is_focus):
+#     embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy")
+#     if os.path.exists(embedding_path):
+#         original_embedding_2d = np.load(embedding_path)
+#         new_strategy._train()
+#         new_strategy.projector.batch_project
+#         embedding_2d = dd.projector.batch_project(EPOCH, all_data)
+#     update_embeddings(strategy, context, EPOCH,  True)
+def update_epoch_projection(context, EPOCH, predicates, isContraVis):
+    # TODO consider active learning setting
+    train_data = context.train_representation_data(EPOCH)
+    test_data = context.test_representation_data(EPOCH)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    print(len(all_data))
+    train_labels = context.train_labels(EPOCH)
+    # test_labels = context.test_labels(EPOCH)
+    # labels = np.concatenate((train_labels, test_labels), axis=0).astype(int)
+    labels = train_labels
+    embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "embedding.npy")
+    if os.path.exists(embedding_path):
+        embedding_2d = np.load(embedding_path)
+    else:
+        embedding_2d = context.strategy.projector.batch_project(EPOCH, all_data)
+        np.save(embedding_path, embedding_2d)
+    training_data_number = context.strategy.config["TRAINING"]["train_num"]
+    testing_data_number = context.strategy.config["TRAINING"]["test_num"]
+    training_data_index = list(range(training_data_number))
+    testing_data_index = list(range(training_data_number, training_data_number + testing_data_number))
+    # return the image of background
+    # read cache if exists
+    bgimg_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "bgimg.png")
+    scale_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "scale.npy")
+    # grid_path = os.path.join(context.strategy.data_provider.checkpoint_path(EPOCH), "grid.pkl")
+    if os.path.exists(bgimg_path) and os.path.exists(scale_path):
+        # with open(os.path.join(grid_path), "rb") as f:
+        #     grid = pickle.load(f)
+        with open(bgimg_path, 'rb') as img_f:
+            img_stream = img_f.read()
+        b_fig = base64.b64encode(img_stream).decode()
+        grid = np.load(scale_path)
+    else:
+        x_min, y_min, x_max, y_max, b_fig = context.strategy.vis.get_background(EPOCH, context.strategy.config["VISUALIZATION"]["RESOLUTION"])
+        grid = [x_min, y_min, x_max, y_max]
+        # formating
+        grid = [float(i) for i in grid]
+        b_fig = str(b_fig, encoding='utf-8')
+        # save results, grid and decision_view
+        # with open(grid_path, "wb") as f:
+        #     pickle.dump(grid, f)
+        np.save(embedding_path, embedding_2d)
+    # TODO fix its structure
+    eval_new = dict()
+    file_name = context.strategy.config["VISUALIZATION"]["EVALUATION_NAME"]
+    save_eval_dir = os.path.join(context.strategy.data_provider.model_path, file_name + ".json")
+    if os.path.exists(save_eval_dir):
+        evaluation = context.strategy.evaluator.get_eval(file_name=file_name)
+        eval_new["train_acc"] = evaluation["train_acc"][str(EPOCH)]
+        eval_new["test_acc"] = evaluation["test_acc"][str(EPOCH)]
+    else:
+        eval_new["train_acc"] = 0
+        eval_new["test_acc"] = 0
+    color = context.strategy.vis.get_standard_classes_color() * 255
+    color = color.astype(int)
+    CLASSES = np.array(context.strategy.config["CLASSES"])
+    # label_color_list = [0] * len(labels)
+    label_color_list = color[labels].tolist()
+    label_list = CLASSES[labels].tolist()
+    label_name_dict = dict(enumerate(CLASSES))
+    prediction_list = []
+    # if (isContraVis == 'false'):
+    #     prediction = context.strategy.data_provider.get_pred(EPOCH, all_data).argmax(1)
+    #     for i in range(len(prediction)):
+    #         prediction_list.append(CLASSES[prediction[i]])
+    for i in range(len(train_data)):
+        prediction_list.append("0")
+    EPOCH_START = context.strategy.config["EPOCH_START"]
+    EPOCH_PERIOD = context.strategy.config["EPOCH_PERIOD"]
+    EPOCH_END = context.strategy.config["EPOCH_END"]
+    max_iter = (EPOCH_END - EPOCH_START) // EPOCH_PERIOD + 1
+    # max_iter = context.get_max_iter()
+    # current_index = timevis.get_epoch_index(EPOCH)
+    # selected_points = np.arange(training_data_number + testing_data_number)[current_index]
+    selected_points = np.arange(training_data_number + testing_data_number)
+    for key in predicates.keys():
+        if key == "label":
+            tmp = np.array(context.filter_label(predicates[key]))
+        elif key == "type":
+            tmp = np.array(context.filter_type(predicates[key], int(EPOCH)))
+        else:
+            tmp = np.arange(training_data_number + testing_data_number)
+        selected_points = np.intersect1d(selected_points, tmp)
+    properties = np.concatenate((np.zeros(training_data_number, dtype=np.int16), 2*np.ones(testing_data_number, dtype=np.int16)), axis=0)
+    lb = context.get_epoch_index(EPOCH)
+    ulb = np.setdiff1d(training_data_index, lb)
+    properties[ulb] = 1
+    highlightedPointIndices = []
+    if (isContraVis == 'false'):
+        high_pred = context.strategy.data_provider.get_pred(EPOCH, all_data).argmax(1)
+        inv_high_dim_data = context.strategy.projector.batch_inverse(EPOCH, embedding_2d)
+        inv_high_pred = context.strategy.data_provider.get_pred(EPOCH, inv_high_dim_data).argmax(1)
+        highlightedPointIndices = np.where(high_pred != inv_high_pred)[0]
+    print("EMBEDDINGLEN", len(embedding_2d))
+    return embedding_2d.tolist(), grid, b_fig, label_name_dict, label_color_list, label_list, max_iter, training_data_index, testing_data_index, eval_new, prediction_list, selected_points, properties, highlightedPointIndices,
+def getContraVisChangeIndices(context, iterationLeft, iterationRight, method):
+    predChangeIndices = []
+    train_data = context.train_representation_data(iterationLeft)
+    test_data = context.test_representation_data(iterationLeft)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationLeft), "embedding.npy")
+    if os.path.exists(embedding_path):
+        embedding_2d = np.load(embedding_path)
+    else:
+        embedding_2d = context.strategy.projector.batch_project(iterationLeft, all_data)
+        np.save(embedding_path, embedding_2d)
+    last_train_data = context.train_representation_data(iterationRight)
+    last_test_data = context.test_representation_data(iterationRight)
+    last_all_data = np.concatenate((last_train_data, last_test_data), axis=0)
+    last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationRight), "embedding.npy")
+    if os.path.exists(last_embedding_path):
+        last_embedding_2d = np.load(last_embedding_path)
+    else:
+        last_embedding_2d = context.strategy.projector.batch_project(iterationRight, last_all_data)
+        np.save(last_embedding_path, last_embedding_2d)
+    if (method == "align"):
+        predChangeIndices = evaluate_isAlign(embedding_2d, last_embedding_2d)
+    elif (method == "nearest neighbour"):
+        predChangeIndices = evaluate_isNearestNeighbour(embedding_2d, last_embedding_2d)
+    elif (method == "both"):
+        predChangeIndices_align = evaluate_isAlign(embedding_2d, last_embedding_2d)
+        predChangeIndices_nearest = evaluate_isNearestNeighbour(embedding_2d, last_embedding_2d)
+        intersection = set(predChangeIndices_align).intersection(predChangeIndices_nearest)
+        predChangeIndices = list(intersection)
+    else:
+        print("wrong method")
+    return predChangeIndices
+def getContraVisChangeIndicesSingle(context, iterationLeft, iterationRight, method, left_selected, right_selected):
+    train_data = context.train_representation_data(iterationLeft)
+    test_data = context.test_representation_data(iterationLeft)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationLeft), "embedding.npy")
+    if os.path.exists(embedding_path):
+        embedding_2d = np.load(embedding_path)
+    else:
+        embedding_2d = context.strategy.projector.batch_project(iterationLeft, all_data)
+        np.save(embedding_path, embedding_2d)
+    last_train_data = context.train_representation_data(iterationRight)
+    last_test_data = context.test_representation_data(iterationRight)
+    last_all_data = np.concatenate((last_train_data, last_test_data), axis=0)
+    last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(iterationRight), "embedding.npy")
+    if os.path.exists(last_embedding_path):
+        last_embedding_2d = np.load(last_embedding_path)
+    else:
+        last_embedding_2d = context.strategy.projector.batch_project(iterationRight, last_all_data)
+        np.save(last_embedding_path, last_embedding_2d)
+    predChangeIndicesLeft = []
+    predChangeIndicesRight = []
+    predChangeIndicesLeft_Left = []
+    predChangeIndicesLeft_Right = []
+    predChangeIndicesRight_Left = []
+    predChangeIndicesRight_Right = []
+    if (method == "align"):
+        predChangeIndicesLeft, predChangeIndicesRight = evaluate_isAlign_single(embedding_2d, last_embedding_2d, left_selected, right_selected)
+    elif (method == "nearest neighbour"):
+        predChangeIndicesLeft_Left, predChangeIndicesLeft_Right,predChangeIndicesRight_Left, predChangeIndicesRight_Right= evaluate_isNearestNeighbour_single(embedding_2d, last_embedding_2d, left_selected, right_selected)
+    return predChangeIndicesLeft, predChangeIndicesRight, predChangeIndicesLeft_Left, predChangeIndicesLeft_Right, predChangeIndicesRight_Left, predChangeIndicesRight_Right
+def getCriticalChangeIndices(context, curr_iteration, last_iteration):
+    predChangeIndices = []
+    train_data = context.train_representation_data(curr_iteration)
+    test_data = context.test_representation_data(curr_iteration)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(curr_iteration), "embedding.npy")
+    if os.path.exists(embedding_path):
+        embedding_2d = np.load(embedding_path)
+    else:
+        embedding_2d = context.strategy.projector.batch_project(curr_iteration, all_data)
+        np.save(embedding_path, embedding_2d)
+    last_train_data = context.train_representation_data(last_iteration)
+    last_test_data = context.test_representation_data(last_iteration)
+    last_all_data = np.concatenate((last_train_data, last_test_data), axis=0)
+    last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(last_iteration), "embedding.npy")
+    if os.path.exists(last_embedding_path):
+        last_embedding_2d = np.load(last_embedding_path)
+    else:
+        last_embedding_2d = context.strategy.projector.batch_project(last_iteration, last_all_data)
+        np.save(last_embedding_path, last_embedding_2d)
+    high_pred = context.strategy.data_provider.get_pred(curr_iteration, all_data).argmax(1)
+    last_high_pred = context.strategy.data_provider.get_pred(last_iteration, last_all_data).argmax(1)
+    predChangeIndices = np.where(high_pred != last_high_pred)[0]
+    return predChangeIndices
+def getConfChangeIndices(context, curr_iteration, last_iteration, confChangeInput):
+    train_data = context.train_representation_data(curr_iteration)
+    test_data = context.test_representation_data(curr_iteration)
+    all_data = np.concatenate((train_data, test_data), axis=0)
+    embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(curr_iteration), "embedding.npy")
+    if os.path.exists(embedding_path):
+        embedding_2d = np.load(embedding_path)
+    else:
+        embedding_2d = context.strategy.projector.batch_project(curr_iteration, all_data)
+        np.save(embedding_path, embedding_2d)
+    last_train_data = context.train_representation_data(last_iteration)
+    last_test_data = context.test_representation_data(last_iteration)
+    last_all_data = np.concatenate((last_train_data, last_test_data), axis=0)
+    last_embedding_path = os.path.join(context.strategy.data_provider.checkpoint_path(last_iteration), "embedding.npy")
+    if os.path.exists(last_embedding_path):
+        last_embedding_2d = np.load(last_embedding_path)
+    else:
+        last_embedding_2d = context.strategy.projector.batch_project(last_iteration, last_all_data)
+        np.save(last_embedding_path, last_embedding_2d)
+    high_pred = context.strategy.data_provider.get_pred(curr_iteration, all_data)
+    last_high_pred = context.strategy.data_provider.get_pred(last_iteration, last_all_data)
+    high_conf = softmax(high_pred, axis=1)
+    last_high_conf = softmax(last_high_pred, axis=1)
+    # get class type with highest prob
+    high_pred_class = high_conf.argmax(axis=1)
+    last_high_pred_class = last_high_conf.argmax(axis=1)
+    same_pred_indices = np.where(high_pred_class == last_high_pred_class)[0]
+    print("same")
+    print(same_pred_indices)
+    # get
+    conf_diff = np.abs(high_conf[np.arange(len(high_conf)), high_pred_class] - last_high_conf[np.arange(len(last_high_conf)), last_high_pred_class])
+    print("conf")
+    print(conf_diff)
+    significant_conf_change_indices = same_pred_indices[conf_diff[same_pred_indices] > confChangeInput]
+    print("siginificant")
+    print(significant_conf_change_indices)
+    return significant_conf_change_indices
+def add_line(path, data_row):
+    """
+    data_row: list, [API_name, username, time]
+    """
+    now_time = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime())
+    data_row.append(now_time)
+    with open(path, "a+") as f:
+        csv_write = csv.writer(f)
+        csv_write.writerow(data_row)

saved_models/codesearch_simp/simplify.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import numpy as np
+import json
+# file_path = "/home/yiming/ContrastDebugger/EXP/codesearch_simp/Model/Epoch_1/train_data.npy"
+# # 读取 ndarray 数据
+# data = np.load(file_path)
+# print(len(data))
+# # # 选择前 50000 条数据
+# # selected_data = data[:50000]
+# # # 重新保存到文件中
+# # np.save(file_path, selected_data)
+# idxs = [i for i in range(len(data))]
+# idxs_path = "/home/yiming/ContrastDebugger/EXP/codesearch_simp/Model/Epoch_1/index.json"
+# json_file = open(idxs_path, mode='w')
+# json.dump(idxs, json_file, indent=4)
+input_file = "/home/yiming/ContrastDebugger/EXP/codesearch_query_simp/Model/label_list.json"
+output_file = "/home/yiming/ContrastDebugger/EXP/codesearch_query_simp/Model/label.txt"
+# 读取输入文件
+with open(input_file, "r") as f:
+    data = json.load(f)
+# 选择前 50000 条数据
+selected_data = data[:50000]
+# 将每条数据作为一行存储到输出文件
+with open(output_file, "w") as f:
+    for item in selected_data:
+        f.write(item + "\n")

saved_models/codesearch_simp/singleVis/SingleVisualizationModel.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from torch import nn
+class SingleVisualizationModel(nn.Module):
+    def __init__(self, input_dims, output_dims, units, hidden_layer=3):
+        super(SingleVisualizationModel, self).__init__()
+        self.input_dims = input_dims
+        self.output_dims = output_dims
+        self.units = units
+        self.hidden_layer = hidden_layer
+        self._init_autoencoder()
+    # TODO find the best model architecture
+    def _init_autoencoder(self):
+        self.encoder = nn.Sequential(
+            nn.Linear(self.input_dims, self.units),
+            nn.ReLU(True))
+        for h in range(self.hidden_layer):
+            self.encoder.add_module("{}".format(2*h+2), nn.Linear(self.units, self.units))
+            self.encoder.add_module("{}".format(2*h+3), nn.ReLU(True))
+        self.encoder.add_module("{}".format(2*(self.hidden_layer+1)), nn.Linear(self.units, self.output_dims))
+        self.decoder = nn.Sequential(
+            nn.Linear(self.output_dims, self.units),
+            nn.ReLU(True))
+        for h in range(self.hidden_layer):
+            self.decoder.add_module("{}".format(2*h+2), nn.Linear(self.units, self.units))
+            self.decoder.add_module("{}".format(2*h+3), nn.ReLU(True))
+        self.decoder.add_module("{}".format(2*(self.hidden_layer+1)), nn.Linear(self.units, self.input_dims))
+    def forward(self, edge_to, edge_from):
+        outputs = dict()
+        embedding_to = self.encoder(edge_to)
+        embedding_from = self.encoder(edge_from)
+        recon_to = self.decoder(embedding_to)
+        recon_from = self.decoder(embedding_from)
+        outputs["umap"] = (embedding_to, embedding_from)
+        outputs["recon"] = (recon_to, recon_from)
+        return outputs
+class VisModel(nn.Module):
+    """define you own visualizatio model by specifying the structure
+    """
+    def __init__(self, encoder_dims, decoder_dims):
+        """define you own visualizatio model by specifying the structure
+        Parameters
+        ----------
+        encoder_dims : list of int
+            the neuron number of your encoder
+            for example, [100,50,2], denote two fully connect layers, with shape (100,50) and (50,2)
+        decoder_dims : list of int
+            same as encoder_dims
+        """
+        super(VisModel, self).__init__()
+        assert len(encoder_dims) > 1
+        assert len(decoder_dims) > 1
+        self.encoder_dims = encoder_dims
+        self.decoder_dims = decoder_dims
+        self._init_autoencoder()
+    def _init_autoencoder(self):
+        self.encoder = nn.Sequential()
+        for i in range(0, len(self.encoder_dims)-2):
+            self.encoder.add_module("{}".format(len(self.encoder)), nn.Linear(self.encoder_dims[i], self.encoder_dims[i+1]))
+            self.encoder.add_module("{}".format(len(self.encoder)), nn.ReLU(True))
+        self.encoder.add_module("{}".format(len(self.encoder)), nn.Linear(self.encoder_dims[-2], self.encoder_dims[-1]))
+        self.decoder = nn.Sequential()
+        for i in range(0, len(self.decoder_dims)-2):
+            self.decoder.add_module("{}".format(len(self.decoder)), nn.Linear(self.decoder_dims[i], self.decoder_dims[i+1]))
+            self.decoder.add_module("{}".format(len(self.decoder)), nn.ReLU(True))
+        self.decoder.add_module("{}".format(len(self.decoder)), nn.Linear(self.decoder_dims[-2], self.decoder_dims[-1]))
+    def forward(self, edge_to, edge_from):
+        outputs = dict()
+        embedding_to = self.encoder(edge_to)
+        embedding_from = self.encoder(edge_from)
+        recon_to = self.decoder(embedding_to)
+        recon_from = self.decoder(embedding_from)
+        outputs["umap"] = (embedding_to, embedding_from)
+        outputs["recon"] = (recon_to, recon_from)
+        return outputs
+'''
+The visualization model definition class
+'''
+import tensorflow as tf
+from tensorflow import keras
+class tfModel(keras.Model):
+    def __init__(self, optimizer, loss, loss_weights, encoder_dims, decoder_dims, batch_size, withoutB=True, attention=True, prev_trainable_variables=None):
+        super(tfModel, self).__init__()
+        self._init_autoencoder(encoder_dims, decoder_dims)
+        self.optimizer = optimizer  # optimizer
+        self.withoutB = withoutB
+        self.attention = attention
+        self.loss = loss  # dict of 3 losses {"total", "umap", "reconstrunction", "regularization"}
+        self.loss_weights = loss_weights  # weights for each loss (in total 3 losses)
+        self.prev_trainable_variables = prev_trainable_variables  # weights for previous iteration
+        self.batch_size = batch_size
+    def _init_autoencoder(self, encoder_dims, decoder_dims):
+        self.encoder = tf.keras.Sequential([
+            tf.keras.layers.InputLayer(input_shape=(encoder_dims[0],)),
+            tf.keras.layers.Flatten(),
+        ])
+        for i in range(1, len(encoder_dims)-1, 1):
+            self.encoder.add(tf.keras.layers.Dense(units=encoder_dims[i], activation="relu"))
+        self.encoder.add(tf.keras.layers.Dense(units=encoder_dims[-1]),)
+        self.decoder = tf.keras.Sequential([
+            tf.keras.layers.InputLayer(input_shape=(decoder_dims[0],)),
+        ])
+        for i in range(1, len(decoder_dims)-1, 1):
+            self.decoder.add(tf.keras.layers.Dense(units=decoder_dims[i], activation="relu"))
+        self.decoder.add(tf.keras.layers.Dense(units=decoder_dims[-1]))
+        print(self.encoder.summary())
+        print(self.decoder.summary())
+    def train_step(self, x):
+        to_x, from_x, to_alpha, from_alpha, n_rate, weight = x[0]
+        to_x = tf.cast(to_x, dtype=tf.float32)
+        from_x = tf.cast(from_x, dtype=tf.float32)
+        to_alpha = tf.cast(to_alpha, dtype=tf.float32)
+        from_alpha = tf.cast(from_alpha, dtype=tf.float32)
+        n_rate = tf.cast(n_rate, dtype=tf.float32)
+        weight = tf.cast(weight, dtype=tf.float32)
+        # Forward pass
+        with tf.GradientTape(persistent=True) as tape:
+            # parametric embedding
+            embedding_to = self.encoder(to_x)  # embedding for instance 1
+            embedding_from = self.encoder(from_x)  # embedding for instance 1
+            embedding_to_recon = self.decoder(embedding_to)  # reconstruct instance 1
+            embedding_from_recon = self.decoder(embedding_from)  # reconstruct instance 1
+            # concatenate embedding1 and embedding2 to prepare for umap loss
+            embedding_to_from = tf.concat((embedding_to, embedding_from, weight),
+                                          axis=1)
+            # reconstruction loss
+            if self.attention:
+                reconstruct_loss = self.loss["reconstruction"](to_x, from_x, embedding_to_recon, embedding_from_recon,to_alpha, from_alpha)
+            else:
+                self.loss["reconstruction"] = tf.keras.losses.MeanSquaredError()
+                reconstruct_loss = self.loss["reconstruction"](y_true=to_x, y_pred=embedding_to_recon)/2 + self.loss["reconstruction"](y_true=from_x, y_pred=embedding_from_recon)/2
+            # umap loss
+            umap_loss = self.loss["umap"](None, embed_to_from=embedding_to_from)  # w_(t-1), no gradient
+            # compute alpha bar
+            alpha_mean = tf.cast(tf.reduce_mean(tf.stop_gradient(n_rate)), dtype=tf.float32)
+            # L2 norm of w current - w for last epoch (subject model's epoch)
+            # dummy zeros-loss if no previous epoch
+            if self.prev_trainable_variables is None:
+                prev_trainable_variables = [tf.stop_gradient(x) for x in self.trainable_variables]
+            else:
+                prev_trainable_variables = self.prev_trainable_variables
+            regularization_loss = self.loss["regularization"](w_prev=prev_trainable_variables,w_current=self.trainable_variables, to_alpha=alpha_mean)
+                # aggregate loss, weighted average
+            loss = tf.add(tf.add(tf.math.multiply(tf.constant(self.loss_weights["reconstruction"]), reconstruct_loss),
+                                    tf.math.multiply(tf.constant(self.loss_weights["umap"]), umap_loss)),
+                            tf.math.multiply(tf.constant(self.loss_weights["regularization"]), regularization_loss))
+        # Compute gradients
+        trainable_vars = self.trainable_variables
+        grads = tape.gradient(loss, trainable_vars)
+        # Update weights
+        self.optimizer.apply_gradients(zip(grads, trainable_vars))
+        return {"loss": loss, "umap": umap_loss, "reconstruction": reconstruct_loss,
+                "regularization": regularization_loss}

saved_models/codesearch_simp/singleVis/__init__.py ADDED Viewed

File without changes

saved_models/codesearch_simp/singleVis/__pycache__/SingleVisualizationModel.cpython-37.pyc ADDED Viewed

Binary file (5.91 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/SingleVisualizationModel.cpython-39.pyc ADDED Viewed

Binary file (5.93 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (111 Bytes). View file

saved_models/codesearch_simp/singleVis/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (152 Bytes). View file

saved_models/codesearch_simp/singleVis/__pycache__/active_sampling.cpython-37.pyc ADDED Viewed

Binary file (860 Bytes). View file

saved_models/codesearch_simp/singleVis/__pycache__/backend.cpython-37.pyc ADDED Viewed

Binary file (5.09 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/backend.cpython-39.pyc ADDED Viewed

Binary file (5.12 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/custom_weighted_random_sampler.cpython-37.pyc ADDED Viewed

Binary file (2.01 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/custom_weighted_random_sampler.cpython-39.pyc ADDED Viewed

Binary file (1.12 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/data.cpython-37.pyc ADDED Viewed

Binary file (35.7 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/data.cpython-39.pyc ADDED Viewed

Binary file (32.5 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/edge_dataset.cpython-37.pyc ADDED Viewed

Binary file (5.22 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/edge_dataset.cpython-39.pyc ADDED Viewed

Binary file (5.15 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/intrinsic_dim.cpython-37.pyc ADDED Viewed

Binary file (4.42 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/intrinsic_dim.cpython-39.pyc ADDED Viewed

Binary file (4.44 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/jj1sk.cpython-37.pyc ADDED Viewed

Binary file (16.5 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/jj51sk.cpython-37.pyc ADDED Viewed

Binary file (16.5 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/jj551sk.cpython-37.pyc ADDED Viewed

Binary file (16.5 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/jjsk.cpython-37.pyc ADDED Viewed

Binary file (16.6 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/kcenter_greedy.cpython-37.pyc ADDED Viewed

Binary file (5.29 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/kcenter_greedy.cpython-39.pyc ADDED Viewed

Binary file (4.9 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/losses.cpython-37.pyc ADDED Viewed

Binary file (12.1 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/losses.cpython-39.pyc ADDED Viewed

Binary file (12.1 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/projector.cpython-37.pyc ADDED Viewed

Binary file (17.6 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/sVis.cpython-37.pyc ADDED Viewed

Binary file (16.6 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/s_Vis.cpython-37.pyc ADDED Viewed

Binary file (16.6 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/segmenter.cpython-37.pyc ADDED Viewed

Binary file (3.82 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skeVis.cpython-37.pyc ADDED Viewed

Binary file (16.4 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skeleVis.cpython-37.pyc ADDED Viewed

Binary file (16.6 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skele_Vis.cpython-37.pyc ADDED Viewed

Binary file (16.4 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skele_viser.cpython-37.pyc ADDED Viewed

Binary file (16.4 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skeletonVis.cpython-37.pyc ADDED Viewed

Binary file (16.3 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skeletonViser.cpython-37.pyc ADDED Viewed

Binary file (16.3 kB). View file

saved_models/codesearch_simp/singleVis/__pycache__/skeletonVisualizer.cpython-37.pyc ADDED Viewed

Binary file (16.3 kB). View file