#Added Retrain all clusters or only from new folder options import os import cv2 import numpy as np from sklearn.cluster import KMeans from tensorflow.keras.models import load_model from sklearn.svm import SVC from sklearn.model_selection import train_test_split from joblib import dump, load from sklearn.cluster import KMeans from keras.models import Sequential from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten import tensorflow as tf # Define desired image size img_size = (1000, 1000) def load_images_from_folder(folder): """ Load and resize images from the specified folder. :param folder: The path to the folder containing the images to load. :return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths. """ images = [] image_paths = [] for filename in os.listdir(folder): file_path = os.path.join(folder, filename) if os.path.isdir(file_path): subfolder_images, subfolder_image_paths = load_images_from_folder(file_path) images.extend(subfolder_images) image_paths.extend(subfolder_image_paths) elif filename.endswith(('.png', '.jpg', '.jpeg')): img = cv2.imread(file_path, 0) img = cv2.resize(img, img_size) images.append(img) image_paths.append(file_path) return images, image_paths def train_model(folder, model_file): """ Train a model for the specified folder and save it to the specified file. :param folder: The path to the folder containing the training data. :param model_file: The path to the file where the trained model will be saved. """ # Load and resize training data images, image_paths = load_images_from_folder(folder) images = np.array(images, dtype=object) # Check if there are enough images if len(images) > 0: # Normalize pixel values images = images.astype('float32') / 255.0 # Create CNN model model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile CNN model using SGD optimizer from tf.keras.optimizers.legacy opt = tf.keras.optimizers.legacy.SGD() model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) # Convert images array to float32 images = images.astype(np.float32) # Train CNN model try: history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150) # Save trained model to file print(model_file, 'here') model.save(model_file) except Exception as e: print(e) def classify_images(folder, model_folder, n_clusters=5, new_only=False): """ Classify images in the specified folder using the specified model and a k-means algorithm. :param folder: The path to the folder containing the images to classify. :param model_folder: The path to the folder containing the trained model. :param n_clusters: The number of clusters to form using the k-means algorithm. :param new_only: Whether to classify only images in a subfolder named "new". :return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. """ # Load trained model from file model_file = os.path.join(folder, os.path.basename(folder) + '.h5') model = load_model(model_file) # Load and resize images from specified folder if new_only: folder = os.path.join(folder, 'new') images, image_paths = load_images_from_folder(folder) images = np.array(images, dtype=object) # Normalize pixel values images = images.astype('float32') / 255.0 # Obtain classification scores for each image scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200) # Use k-means algorithm to cluster images based on their classification scores if len(scores) >= n_clusters: kmeans = KMeans(n_clusters=n_clusters, n_init=20) kmeans.fit(scores) # Create 2D list of image file paths, where each inner list corresponds to a cluster clusters = [[] for _ in range(n_clusters)] for i, label in enumerate(kmeans.labels_): clusters[label].append(image_paths[i]) else: clusters = [image_paths] # Return 2D list of image file paths return clusters def remove_empty_folders_recursively(directory): """ Remove and delete empty folders in the specified directory and all of its subdirectories. :param directory: The path to the directory to remove empty folders from. """ for folder in os.listdir(directory): folder_path = os.path.join(directory, folder) if os.path.isdir(folder_path): # Recursively remove empty subfolders remove_empty_folders_recursively(folder_path) # Remove folder if it is empty if not os.listdir(folder_path): os.rmdir(folder_path) def train_model_recursively(folder, model_folder, max_depth=None, depth=0): """ Train a model for the specified folder and its subdirectories and save it to the specified file. :param folder: The path to the folder containing the training data. :param model_folder: The path to the folder where the trained models will be saved. :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. :param depth: The current depth of recursion. """ # Train model for current folder model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5') train_model(folder, model_file) # Recursively train models for subdirectories if max_depth is None or depth < max_depth: for subfolder in os.listdir(folder): subfolder_path = os.path.join(folder, subfolder) if os.path.isdir(subfolder_path): model_folder = subfolder_path print(model_folder,subfolder_path) #print(subfolder_path,folder,subfolder,model_folder) train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1) def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0): """ Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm. :param folder: The path to the folder containing the images to classify. :param model_folder: The path to the folder containing the trained models. :param n_clusters: The number of clusters to form using the k-means algorithm. :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. :param depth: The current depth of recursion. :return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. """ # Classify images in current folder clusters = classify_images(folder, model_folder, n_clusters) result = {folder: clusters} # Recursively classify images in subdirectories if max_depth is None or depth < max_depth: for subfolder in os.listdir(folder): subfolder_path = os.path.join(folder, subfolder) if os.path.isdir(subfolder_path): result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1)) # Return result return result def main(): # Train models for textcv and buttoncv folders and their subdirectories train_model_recursively('textcv', 'textcv') train_model_recursively('buttoncv', 'buttoncv') # Check for and remove empty subfolders remove_empty_folders_recursively('textcv') remove_empty_folders_recursively('buttoncv') # Classify images in textcv and buttoncv folders and their subdirectories text_clusters = classify_images_recursively('textcv', 'models') button_clusters = classify_images_recursively('buttoncv', 'models') try: # Move images in textcv clusters to new folders for folder, clusters in text_clusters.items(): for i, cluster in enumerate(clusters): cluster_folder = os.path.join(folder, f'cluster_{i}') os.makedirs(cluster_folder, exist_ok=True) for image_path in cluster: new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) os.rename(image_path, new_image_path) except Exception as e: print(e) try: # Move images in buttoncv clusters to new folders for folder, clusters in button_clusters.items(): for i, cluster in enumerate(clusters): cluster_folder = os.path.join(folder, f'cluster_{i}') os.makedirs(cluster_folder, exist_ok=True) for image_path in cluster: new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) os.rename(image_path, new_image_path) except Exception as e: print(e) if __name__ == '__main__': main()