File size: 4,091 Bytes
f291f4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import numpy as np
import json
import os
from pynndescent import NNDescent
# helper function
def hausdorff_d(curr_data, prev_data):
# number of trees in random projection forest
n_trees = min(64, 5 + int(round((curr_data.shape[0]) ** 0.5 / 20.0)))
# max number of nearest neighbor iters to perform
n_iters = max(5, int(round(np.log2(curr_data.shape[0]))))
# distance metric
metric = "euclidean"
# get nearest neighbors
nnd = NNDescent(
curr_data,
n_neighbors=1,
metric=metric,
n_trees=n_trees,
n_iters=n_iters,
max_candidates=10,
verbose=False
)
_, dists1 = nnd.query(prev_data,k=1)
m1 = dists1.mean()
return m1
class Segmenter:
def __init__(self, data_provider, threshold, range_s=None, range_e=None, range_p=None):
self.data_provider = data_provider
self.threshold = threshold
if range_s is None:
self.s = data_provider.s
self.e = data_provider.e
self.p = data_provider.p
else:
self.s = range_s
self.e = range_e
self.p = range_p
def _cal_interval_dists(self):
interval_num = (self.e - self.s)// self.p
dists = np.zeros(interval_num)
for curr_epoch in range(self.s, self.e, self.p):
next_data = self.data_provider.train_representation(curr_epoch+ self.p)
curr_data = self.data_provider.train_representation(curr_epoch)
l = next_data.shape[0]
next_data = next_data.reshape(l, - 1)
curr_data = curr_data.reshape(l, -1)
# reshape representation
dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
# self.dists = np.copy(dists)
return dists
def segment(self):
dists = self._cal_interval_dists()
dists_segs = list()
count = 0
base = len(dists)-1
for i in range(len(dists)-1, -1, -1):
count = count + dists[i]
if count >self.threshold:
dists_segs.insert(0, (i+1, base))
base = i
count = dists[i]
dists_segs.insert(0, (0, base))
segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
self.segments = segs
return segs
def record_time(self, save_dir, file_name, t):
# save result
save_file = os.path.join(save_dir, file_name+".json")
if not os.path.exists(save_file):
evaluation = dict()
else:
f = open(save_file, "r")
evaluation = json.load(f)
f.close()
evaluation["segmentation"] = round(t, 3)
with open(save_file, 'w') as f:
json.dump(evaluation, f)
class DenseALSegmenter(Segmenter):
def __init__(self, data_provider, threshold, epoch_num):
super().__init__(data_provider, threshold, 1, epoch_num, 1)
def _cal_interval_dists(self, iteration):
interval_num = (self.e - self.s)// self.p
dists = np.zeros(interval_num)
for curr_epoch in range(self.s, self.e, self.p):
next_data = self.data_provider.train_representation_lb(iteration, curr_epoch+ self.p)
curr_data = self.data_provider.train_representation_lb(iteration, curr_epoch)
dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
# self.dists = np.copy(dists)
return dists
def segment(self, iteration):
dists = self._cal_interval_dists(iteration)
dists_segs = list()
count = 0
base = len(dists)-1
for i in range(len(dists)-1, -1, -1):
count = count + dists[i]
if count >self.threshold:
dists_segs.insert(0, (i+1, base))
base = i
count = dists[i]
dists_segs.insert(0, (0, base))
segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
return segs
|