|
|
|
|
|
import torch |
|
import numpy as np |
|
from sklearn.neighbors import NearestNeighbors |
|
|
|
from sklearn.cluster import KMeans |
|
|
|
from sklearn.metrics.pairwise import euclidean_distances |
|
import numpy as np |
|
|
|
from sklearn.datasets import make_blobs |
|
from sklearn.cluster import KMeans |
|
|
|
class SkeletonGenerator: |
|
"""SkeletonGenerator except allows for generate skeleton""" |
|
def __init__(self, data_provider, epoch, interval=25,base_num_samples=10): |
|
""" |
|
interval: int : layer number of the radius |
|
""" |
|
self.data_provider = data_provider |
|
self.epoch = epoch |
|
self.interval = interval |
|
self.base_num_samples= base_num_samples |
|
|
|
def skeleton_gen(self): |
|
torch.manual_seed(0) |
|
torch.cuda.manual_seed_all(0) |
|
|
|
|
|
np.random.seed(0) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
train_data=self.data_provider.train_representation(epoch=self.epoch) |
|
train_data = torch.Tensor(train_data) |
|
center = train_data.mean(dim=0) |
|
|
|
radius = ((train_data - center)**2).sum(dim=1).max().sqrt() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
radii = self.create_decreasing_array(1e-3,radius.item(), self.interval) |
|
epsilon = 1e-2 |
|
train_data_distances = ((train_data - center)**2).sum(dim=1).sqrt().cpu().detach().numpy() |
|
|
|
|
|
num_samples_per_radius_l = [] |
|
for r in radii: |
|
close_points_indices = np.where(np.abs(train_data_distances - r) < epsilon)[0] |
|
close_points = train_data[close_points_indices].cpu().detach().numpy() |
|
print("len()",r, len(close_points)) |
|
|
|
|
|
|
|
base_num_samples = len(close_points) + 1 |
|
num_samples = int(base_num_samples * r // 4) |
|
num_samples_per_radius_l.append(num_samples) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("num_samples_per_radius_l",radii) |
|
print("num_samples_per_radssius_l",num_samples_per_radius_l) |
|
|
|
high_bom_samples = [] |
|
|
|
for i in range(len(radii)): |
|
r = radii[i] |
|
|
|
num_samples_per_radius = num_samples_per_radius_l[i] |
|
|
|
samples = torch.randn(num_samples_per_radius, 512) |
|
samples = samples / samples.norm(dim=1, keepdim=True) * r |
|
|
|
high_bom_samples.append(samples) |
|
|
|
|
|
high_bom = torch.cat(high_bom_samples, dim=0) |
|
|
|
high_bom = high_bom.cpu().detach().numpy() |
|
|
|
print("shape", high_bom.shape) |
|
|
|
|
|
|
|
|
|
|
|
epsilon = 1e-3 |
|
for r in radii: |
|
close_points_indices = np.where(np.abs(train_data_distances - r) < epsilon)[0] |
|
close_points = train_data[close_points_indices].cpu().detach().numpy() |
|
high_bom = np.concatenate((high_bom, close_points), axis=0) |
|
|
|
|
|
return high_bom |
|
|
|
def skeleton_gen_union(self): |
|
torch.manual_seed(0) |
|
torch.cuda.manual_seed_all(0) |
|
|
|
|
|
np.random.seed(0) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
train_data=self.data_provider.train_representation(epoch=self.epoch) |
|
|
|
|
|
kmeans = KMeans(n_clusters=1) |
|
|
|
kmeans.fit(train_data) |
|
|
|
center = kmeans.cluster_centers_[0] |
|
center = torch.Tensor(center) |
|
|
|
radius = ((train_data - center)**2).sum(dim=1).max().sqrt() |
|
print("radius,radius",radius) |
|
|
|
min_radius_log = np.log10(1e-3) |
|
max_radius_log = np.log10(radius.item() * 1) |
|
|
|
|
|
radii_log = np.linspace(max_radius_log, min_radius_log, self.interval) |
|
|
|
radii = 10 ** radii_log |
|
|
|
|
|
|
|
num_samples_per_radius_l = [] |
|
for r in radii: |
|
|
|
|
|
|
|
num_samples = int(self.base_num_samples * r // 2) |
|
num_samples_per_radius_l.append(num_samples) |
|
|
|
|
|
|
|
radius = radius.item() |
|
|
|
radii = [ radius / 4, radius / 10, 1e-3] |
|
|
|
aaa = 200 |
|
num_samples_per_radius_l = [aaa, aaa, aaa, aaa, aaa, aaa] |
|
print("num_samples_per_radius_l",radii) |
|
print("num_samples_per_radius_l",num_samples_per_radius_l) |
|
|
|
high_bom_samples = [] |
|
|
|
for i in range(len(radii)): |
|
r = radii[i] |
|
|
|
num_samples_per_radius = num_samples_per_radius_l[i] |
|
|
|
samples = torch.randn(num_samples_per_radius, 512) |
|
samples = samples / samples.norm(dim=1, keepdim=True) * r |
|
|
|
high_bom_samples.append(samples) |
|
|
|
|
|
high_bom = torch.cat(high_bom_samples, dim=0) |
|
|
|
high_bom = high_bom.cpu().detach().numpy() |
|
|
|
print("shape", high_bom.shape) |
|
|
|
|
|
train_data_distances = ((train_data - center)**2).sum(dim=1).sqrt().cpu().detach().numpy() |
|
|
|
|
|
epsilon = 1e-2 |
|
for r in radii: |
|
close_points_indices = np.where(np.abs(train_data_distances - r) < epsilon)[0] |
|
close_points = train_data[close_points_indices].cpu().detach().numpy() |
|
high_bom = np.concatenate((high_bom, close_points), axis=0) |
|
|
|
|
|
return high_bom |
|
|
|
def skeleton_gen_use_perturb(self, _epsilon=1e-2, _per=0.7): |
|
""" |
|
find the nearest training data for each radius, |
|
and then generate new proxes by this add perturbation on these nearest training data |
|
""" |
|
torch.manual_seed(0) |
|
torch.cuda.manual_seed_all(0) |
|
np.random.seed(0) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
epsilon = _epsilon |
|
|
|
train_data = self.data_provider.train_representation(epoch=self.epoch) |
|
|
|
|
|
train_data = torch.Tensor(train_data) |
|
center = train_data.mean(dim=0) |
|
|
|
|
|
max_radius = ((train_data - center)**2).sum(dim=1).max().sqrt().item() |
|
min_radius = max_radius * _per |
|
|
|
|
|
|
|
interval = int(max_radius * 12.8) |
|
print("max_radius", max_radius,"interval",interval) |
|
|
|
|
|
radii = np.linspace(max_radius, min_radius, interval) |
|
|
|
|
|
high_bom_samples = [] |
|
train_data_distances = ((train_data - center)**2).sum(dim=1).sqrt().cpu().detach().numpy() |
|
print(train_data_distances) |
|
|
|
for r in radii: |
|
|
|
|
|
close_points_indices = np.where(np.abs(train_data_distances - r) < epsilon)[0] |
|
close_points = train_data[close_points_indices] |
|
|
|
|
|
direction_to_center = (close_points - center) / torch.norm(close_points - center, dim=1, keepdim=True) |
|
|
|
|
|
noise = direction_to_center * (epsilon) |
|
|
|
proxies = (close_points + noise).cpu().detach().numpy() |
|
|
|
|
|
high_bom_samples.append(proxies) |
|
|
|
high_bom = np.concatenate(high_bom_samples, axis=0) |
|
|
|
return high_bom |
|
|
|
def gen_skeleton_by_center(self,): |
|
train_data = self.data_provider.train_representation(self.epoch) |
|
kmeans = KMeans(n_clusters=1) |
|
|
|
kmeans.fit(train_data) |
|
|
|
centers = kmeans.cluster_centers_ |
|
return |
|
|
|
|
|
def create_decreasing_array(self,min_val, max_val, levels, factor=0.8): |
|
|
|
range_val = max_val - min_val |
|
|
|
|
|
level_indices = np.arange(levels) |
|
|
|
|
|
scaled_levels = factor ** level_indices |
|
|
|
|
|
scaled_values = scaled_levels * range_val / np.max(scaled_levels) |
|
|
|
|
|
final_values = max_val - scaled_values |
|
|
|
return final_values |
|
|
|
|
|
|
|
class CenterSkeletonGenerator: |
|
"""SkeletonGenerator except allows for generate skeleton""" |
|
def __init__(self, data_provider, epoch,threshold=0.5,min_cluster=500): |
|
""" |
|
|
|
""" |
|
self.data_provider = data_provider |
|
self.epoch = epoch |
|
self.threshold = threshold |
|
self.min_cluster = min_cluster |
|
|
|
def gen_center(self,data,k=2): |
|
""" |
|
""" |
|
kmeans = KMeans(n_clusters=k) |
|
kmeans.fit(data) |
|
centers = kmeans.cluster_centers_ |
|
labels = kmeans.labels_ |
|
radii = [] |
|
for i in range(k): |
|
cluster_data = data[labels == i] |
|
if len(cluster_data) > 0: |
|
|
|
distances = np.sqrt(((cluster_data - centers[i]) ** 2).sum(axis=1)) |
|
radii.append(np.max(distances)) |
|
else: |
|
radii.append(0) |
|
|
|
return centers,labels,radii |
|
|
|
def if_need_split(self, data): |
|
if len(data) < self.min_cluster: |
|
return False |
|
|
|
kmeans = KMeans(n_clusters=2) |
|
kmeans.fit(data) |
|
labels = kmeans.labels_ |
|
|
|
dunn_index = self.dunns_index(data, labels) |
|
print(dunn_index) |
|
return dunn_index < self.threshold |
|
|
|
def dunns_index(self, X, labels): |
|
distance_matrix = euclidean_distances(X) |
|
|
|
inter_cluster_distances = [] |
|
intra_cluster_distances = [] |
|
|
|
unique_labels = np.unique(labels) |
|
|
|
|
|
if len(unique_labels) < 2: |
|
return float('inf') |
|
|
|
|
|
for label in unique_labels: |
|
members = np.where(labels == label)[0] |
|
if len(members) <= 1: |
|
continue |
|
pairwise_distances = distance_matrix[np.ix_(members, members)] |
|
intra_cluster_distances.append(np.max(pairwise_distances)) |
|
|
|
if not intra_cluster_distances: |
|
return float('inf') |
|
|
|
max_intra_cluster_distance = max(intra_cluster_distances) |
|
|
|
|
|
for i in range(len(unique_labels)): |
|
for j in range(i+1, len(unique_labels)): |
|
members_i = np.where(labels == unique_labels[i])[0] |
|
members_j = np.where(labels == unique_labels[j])[0] |
|
pairwise_distances = distance_matrix[np.ix_(members_i, members_j)] |
|
inter_cluster_distances.append(np.min(pairwise_distances)) |
|
|
|
if not inter_cluster_distances: |
|
return float('inf') |
|
|
|
return min(inter_cluster_distances) / max_intra_cluster_distance |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def recursive_clustering(self, data,k=2): |
|
centers, labels, radii = self.gen_center(data, k=k) |
|
all_centers = list(centers) |
|
all_radii = list(radii) |
|
|
|
for label in set(labels): |
|
cluster = data[labels == label] |
|
if len(cluster): |
|
if self.if_need_split(cluster): |
|
|
|
sub_centers, sub_radii = self.recursive_clustering(cluster, k=2) |
|
all_centers.extend(sub_centers) |
|
all_radii.extend(sub_radii) |
|
|
|
return all_centers, all_radii |
|
|
|
|
|
def center_skeleton_genertaion(self): |
|
|
|
data = self.data_provider.train_representation(self.epoch) |
|
centers_c, _, radii_c = self.gen_center(self.data_provider.train_representation(self.epoch),k=1) |
|
centers_n, labels,radii_n = self.gen_center(self.data_provider.train_representation(self.epoch),k=10) |
|
print("finished init") |
|
|
|
|
|
|
|
all_centers = [] |
|
all_radii = [] |
|
for label in range(len(labels)): |
|
cluster = data[labels == label] |
|
if len(cluster): |
|
|
|
sub_centers, sub_radii = self.recursive_clustering(cluster, k=2) |
|
all_centers.extend(sub_centers) |
|
all_radii.extend(sub_radii) |
|
|
|
all_centers = np.array(all_centers) |
|
all_radii = np.array(all_radii) |
|
return np.concatenate((centers_c,centers_n,all_centers),axis=0),np.concatenate((radii_c, radii_n, all_radii), axis=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|