Spaces:
Sleeping
Sleeping
File size: 2,042 Bytes
588bc8d c7a4d5b 588bc8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics.pairwise import cosine_similarity
import os
# Load the pre-trained ResNet50 model
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
# Function to extract feature vector from an image
def extract_features(img_path, model):
img = image.load_img(img_path, target_size=(224, 224))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)
features = model.predict(img_data)
return features.flatten()
# Directory containing images
image_dir = './forward_facing'
# Extract features for all images
image_features = {}
for img_file in os.listdir(image_dir):
img_path = os.path.join(image_dir, img_file)
features = extract_features(img_path, model)
image_features[img_file] = features
# Convert feature dictionary to list for processing
feature_list = list(image_features.values())
file_list = list(image_features.keys())
# Calculate similarities
num_images = len(file_list)
similarity_matrix = np.zeros((num_images, num_images))
for i in range(num_images):
for j in range(i, num_images):
if i != j:
similarity = cosine_similarity(
[feature_list[i]],
[feature_list[j]]
)[0][0]
similarity_matrix[i][j] = similarity
similarity_matrix[j][i] = similarity
# Identify and remove duplicates
threshold = 0.9 # Similarity threshold for duplicates
duplicates = set()
for i in range(num_images):
for j in range(i + 1, num_images):
if similarity_matrix[i][j] > threshold:
duplicates.add(file_list[j])
# Remove duplicates
# for duplicate in duplicates:
# os.remove(os.path.join(image_dir, duplicate))
print("Duplicate Images No => ", len(duplicates))
# print(f"Removed {len(duplicates)} duplicate images.")
|