SalazarPevelll
be
f291f4a
raw
history blame
4.09 kB
import numpy as np
import json
import os
from pynndescent import NNDescent
# helper function
def hausdorff_d(curr_data, prev_data):
# number of trees in random projection forest
n_trees = min(64, 5 + int(round((curr_data.shape[0]) ** 0.5 / 20.0)))
# max number of nearest neighbor iters to perform
n_iters = max(5, int(round(np.log2(curr_data.shape[0]))))
# distance metric
metric = "euclidean"
# get nearest neighbors
nnd = NNDescent(
curr_data,
n_neighbors=1,
metric=metric,
n_trees=n_trees,
n_iters=n_iters,
max_candidates=10,
verbose=False
)
_, dists1 = nnd.query(prev_data,k=1)
m1 = dists1.mean()
return m1
class Segmenter:
def __init__(self, data_provider, threshold, range_s=None, range_e=None, range_p=None):
self.data_provider = data_provider
self.threshold = threshold
if range_s is None:
self.s = data_provider.s
self.e = data_provider.e
self.p = data_provider.p
else:
self.s = range_s
self.e = range_e
self.p = range_p
def _cal_interval_dists(self):
interval_num = (self.e - self.s)// self.p
dists = np.zeros(interval_num)
for curr_epoch in range(self.s, self.e, self.p):
next_data = self.data_provider.train_representation(curr_epoch+ self.p)
curr_data = self.data_provider.train_representation(curr_epoch)
l = next_data.shape[0]
next_data = next_data.reshape(l, - 1)
curr_data = curr_data.reshape(l, -1)
# reshape representation
dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
# self.dists = np.copy(dists)
return dists
def segment(self):
dists = self._cal_interval_dists()
dists_segs = list()
count = 0
base = len(dists)-1
for i in range(len(dists)-1, -1, -1):
count = count + dists[i]
if count >self.threshold:
dists_segs.insert(0, (i+1, base))
base = i
count = dists[i]
dists_segs.insert(0, (0, base))
segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
self.segments = segs
return segs
def record_time(self, save_dir, file_name, t):
# save result
save_file = os.path.join(save_dir, file_name+".json")
if not os.path.exists(save_file):
evaluation = dict()
else:
f = open(save_file, "r")
evaluation = json.load(f)
f.close()
evaluation["segmentation"] = round(t, 3)
with open(save_file, 'w') as f:
json.dump(evaluation, f)
class DenseALSegmenter(Segmenter):
def __init__(self, data_provider, threshold, epoch_num):
super().__init__(data_provider, threshold, 1, epoch_num, 1)
def _cal_interval_dists(self, iteration):
interval_num = (self.e - self.s)// self.p
dists = np.zeros(interval_num)
for curr_epoch in range(self.s, self.e, self.p):
next_data = self.data_provider.train_representation_lb(iteration, curr_epoch+ self.p)
curr_data = self.data_provider.train_representation_lb(iteration, curr_epoch)
dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
# self.dists = np.copy(dists)
return dists
def segment(self, iteration):
dists = self._cal_interval_dists(iteration)
dists_segs = list()
count = 0
base = len(dists)-1
for i in range(len(dists)-1, -1, -1):
count = count + dists[i]
if count >self.threshold:
dists_segs.insert(0, (i+1, base))
base = i
count = dists[i]
dists_segs.insert(0, (0, base))
segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
return segs