File size: 4,091 Bytes
f291f4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import json
import os
from pynndescent import NNDescent

# helper function
def hausdorff_d(curr_data, prev_data):
    # number of trees in random projection forest
    n_trees = min(64, 5 + int(round((curr_data.shape[0]) ** 0.5 / 20.0)))
    # max number of nearest neighbor iters to perform
    n_iters = max(5, int(round(np.log2(curr_data.shape[0]))))
    # distance metric
    metric = "euclidean"
    # get nearest neighbors
    nnd = NNDescent(
        curr_data,
        n_neighbors=1,
        metric=metric,
        n_trees=n_trees,
        n_iters=n_iters,
        max_candidates=10,
        verbose=False
    )
    _, dists1 = nnd.query(prev_data,k=1)
    m1 = dists1.mean()
    return m1

class Segmenter:
    def __init__(self, data_provider, threshold, range_s=None, range_e=None, range_p=None):
        self.data_provider = data_provider
        self.threshold = threshold
        if range_s is None:
            self.s = data_provider.s
            self.e = data_provider.e
            self.p = data_provider.p
        else:
            self.s = range_s
            self.e = range_e
            self.p = range_p

    def _cal_interval_dists(self):
        interval_num = (self.e - self.s)// self.p

        dists = np.zeros(interval_num)
        for curr_epoch in range(self.s, self.e, self.p):
            next_data = self.data_provider.train_representation(curr_epoch+ self.p)
            curr_data = self.data_provider.train_representation(curr_epoch)
            l = next_data.shape[0]
            next_data = next_data.reshape(l, - 1)
            curr_data = curr_data.reshape(l, -1)
            # reshape representation
            dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
        
        # self.dists = np.copy(dists)
        return dists
    def segment(self):
        dists = self._cal_interval_dists()
        dists_segs = list()
        count = 0
        base = len(dists)-1
        for i in range(len(dists)-1, -1, -1):
            count = count + dists[i]
            if count >self.threshold:
                dists_segs.insert(0, (i+1, base))
                base = i
                count = dists[i]
        dists_segs.insert(0, (0, base))
        segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
        self.segments = segs
        return segs
    
    def record_time(self, save_dir, file_name, t):
        # save result
        save_file = os.path.join(save_dir, file_name+".json")
        if not os.path.exists(save_file):
            evaluation = dict()
        else:
            f = open(save_file, "r")
            evaluation = json.load(f)
            f.close()
        evaluation["segmentation"] = round(t, 3)
        with open(save_file, 'w') as f:
            json.dump(evaluation, f)


class DenseALSegmenter(Segmenter):
    def __init__(self, data_provider, threshold, epoch_num):
        super().__init__(data_provider, threshold, 1, epoch_num, 1)
    
    def _cal_interval_dists(self, iteration):
        interval_num = (self.e - self.s)// self.p

        dists = np.zeros(interval_num)
        for curr_epoch in range(self.s, self.e, self.p):
            next_data = self.data_provider.train_representation_lb(iteration, curr_epoch+ self.p)
            curr_data = self.data_provider.train_representation_lb(iteration, curr_epoch)
            dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data)
        
        # self.dists = np.copy(dists)
        return dists
    def segment(self, iteration):
        dists = self._cal_interval_dists(iteration)
        dists_segs = list()
        count = 0
        base = len(dists)-1
        for i in range(len(dists)-1, -1, -1):
            count = count + dists[i]
            if count >self.threshold:
                dists_segs.insert(0, (i+1, base))
                base = i
                count = dists[i]
        dists_segs.insert(0, (0, base))
        segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs]
        return segs