import torch import matplotlib.pyplot as plt from sklearn.manifold import TSNE import seaborn as sns import umap import os from post_train import collect_features, prepare_data_and_model def visualize_features(features, labels, method='tsne', save_path=None, include_unknown=False): """ 可视化特征分布 Args: features: torch.Tensor, 特征向量 labels: torch.Tensor, 标签 method: str, 'tsne' 或 'umap' save_path: str, 保存路径,如果为None则显示图像 include_unknown: bool, 是否包含未知类(第21类) """ # 转换为numpy数组 features = features.cpu().numpy() labels = labels.cpu().numpy() # 降维 print(f"Performing {method.upper()} dimensionality reduction...") if method.lower() == 'tsne': reducer = TSNE(n_components=2, random_state=42) embedded = reducer.fit_transform(features) else: # umap reducer = umap.UMAP(n_components=2, random_state=42) embedded = reducer.fit_transform(features) # 清理之前的图像状态并创建新图形 plt.close('all') # 关闭所有图形 fig = plt.figure(figsize=(15, 10)) # 定义标记样式和颜色 markers = ['o', 's', '^', 'D'] # 圆形、方形、三角形、菱形 colors = ['#FF4B4B', '#4B4BFF', '#4BFF4B', '#FFB74B', '#B74BFF'] # 红、蓝、绿、橙、紫 # 确定要绘制的类别数量 num_classes = 21 if include_unknown else 20 # 为每个类别分配标记和颜色 for i in range(num_classes): marker_idx = i % len(markers) color_idx = i % len(colors) mask = labels == i if i == 20: # 未知类使用特殊标记 plt.scatter( embedded[mask, 0], embedded[mask, 1], c='gray', # 使用灰色 marker='*', # 使用星形 s=150, # 稍微大一点 alpha=0.6, label='Unknown', edgecolors='white', linewidth=0.5 ) else: plt.scatter( embedded[mask, 0], embedded[mask, 1], c=colors[color_idx], marker=markers[marker_idx], s=100, alpha=0.6, label=f'Class {i}', edgecolors='white', linewidth=0.5 ) plt.grid(True, linestyle='--', alpha=0.7) plt.title(f'Feature Distribution ({method.upper()})', fontsize=14, pad=20) # 调整图例 plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, ncol=1, # 使用单列显示图例 fontsize=10) # 调整布局 plt.tight_layout() # 先保存再显示 if save_path: print(f"Saving plot to {save_path}") os.makedirs(os.path.dirname(save_path), exist_ok=True) plt.savefig(save_path, bbox_inches='tight', dpi=300, pad_inches=0.5) plt.show() plt.close() if __name__ == "__main__": # 设置设备 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model, train_loader, val_loader, device = prepare_data_and_model(model_path='models/resnet50_99.92.pth', model_type='resnet50', batch_size=128) ## 加载特征 print("Collecting features of training set...") features, labels = collect_features( model=model, loader=train_loader, device=device ) print("Collecting features of validation set...") val_features, val_labels = collect_features( model=model, loader=val_loader, device=device ) # 可视化特征 # print("Visualizing features using t-SNE...") # visualize_features( # features=features, # labels=labels, # method='tsne', # save_path='outputs/tsne_features.png' # ) print("Visualizing features using UMAP...") visualize_features( features=features, labels=labels, method='umap', save_path='outputs/resnet50_train_FeatureMap.png', include_unknown=False ) visualize_features( features=val_features, labels=val_labels, method='umap', save_path='outputs/resnet50_val_FeatureMap.png', include_unknown=True )