Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| from datasets import load_dataset | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Load model and processor | |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
| # Load dataset sample | |
| dataset = load_dataset("Dataseeds/DataSeeds.AI-Sample-Dataset-DSD")["train"] | |
| sample = dataset.shuffle(seed=42).select(range(1500)) | |
| # Load image embeddings | |
| image_embeddings = np.load("image_embeddings.npy") | |
| image_paths = sample["image_id"] | |
| # Function to get text embedding | |
| def get_text_embedding(text): | |
| inputs = processor(text=[text], return_tensors="pt") | |
| with torch.no_grad(): | |
| embedding = model.get_text_features(**inputs) | |
| embedding = embedding / embedding.norm(dim=-1, keepdim=True) | |
| return embedding.cpu().numpy() | |
| # Function to find best match | |
| def find_best_match(text): | |
| text_embedding = get_text_embedding(text) | |
| similarities = cosine_similarity(text_embedding, image_embeddings) | |
| best_index = np.argmax(similarities) | |
| best_image = sample[int(best_index)]["image"] | |
| similarity_score = similarities[0, best_index] | |
| return best_image, f"Image ID: {image_paths[best_index]}, Similarity Score: {similarity_score:.4f}" | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=find_best_match, | |
| inputs=gr.Textbox(lines=1, placeholder="Enter description here..."), | |
| outputs=[gr.Image(type="pil"), gr.Textbox()], | |
| title="Image Retrieval by Text Description", | |
| description="Enter a description and see the most similar image from the dataset." | |
| ) | |
| iface.launch() | |