Spaces:
Runtime error
Runtime error
salihmarangoz
commited on
Commit
•
d7b8e7c
1
Parent(s):
77666dd
init
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- .gitignore +1 -0
- .vscode/settings.json +3 -0
- README.md +1 -1
- app.py +125 -0
- image_filenames.txt +0 -0
- images/0_430b273cb7915d83.jpg +3 -0
- images/0_618577ba46bc3d04.jpg +3 -0
- images/0_8684223b404f7e47.jpg +3 -0
- images/0_c8f198d7da690cc8.jpg +3 -0
- images/0_e7ec496a16bfed05.jpg +3 -0
- images/0_e8b9b9a19ebf6a0d.jpg +3 -0
- images/0_ff075584fa3ac3fe.jpg +3 -0
- images/1000_nm1265067_rm1814797056_1975-7-6_2006.jpg +3 -0
- images/1000_nm1265067_rm2092538624_1975-7-6_2006.jpg +3 -0
- images/1000_nm1265067_rm2210106880_1975-7-6_2009.jpg +3 -0
- images/1000_nm1265067_rm2516756224_1975-7-6_2013.jpg +3 -0
- images/1000_nm1265067_rm3130391296_1975-7-6_2014.jpg +3 -0
- images/1000_nm1265067_rm3375870464_1975-7-6_2011.jpg +3 -0
- images/1000_nm1265067_rm518437632_1975-7-6_2011.jpg +3 -0
- images/1000_nm1265067_rm964988416_1975-7-6_2009.jpg +3 -0
- images/1001_nm0118568_rm1345886720_1978-2-9_2006.jpg +3 -0
- images/1001_nm0118568_rm1602599168_1978-2-9_2010.jpg +3 -0
- images/1001_nm0118568_rm2938931456_1978-2-9_2004.jpg +3 -0
- images/1001_nm0118568_rm3014951936_1978-2-9_2010.jpg +3 -0
- images/1001_nm0118568_rm3094448384_1978-2-9_2009.jpg +3 -0
- images/1001_nm0118568_rm3115615232_1978-2-9_2010.jpg +3 -0
- images/1001_nm0118568_rm323651840_1978-2-9_2009.jpg +3 -0
- images/1001_nm0118568_rm3554462208_1978-2-9_2005.jpg +3 -0
- images/1001_nm0118568_rm4114192896_1978-2-9_2000.jpg +3 -0
- images/1001_nm0118568_rm908588288_1978-2-9_2014.jpg +3 -0
- images/1002_nm1404488_rm1000782336_1991-4-10_2011.jpg +3 -0
- images/1002_nm1404488_rm1784853248_1991-4-10_2011.jpg +3 -0
- images/1002_nm1404488_rm1801630464_1991-4-10_2011.jpg +3 -0
- images/1002_nm1404488_rm1820361728_1991-4-10_2010.jpg +3 -0
- images/1002_nm1404488_rm1835184896_1991-4-10_2011.jpg +3 -0
- images/1002_nm1404488_rm2423361024_1991-4-10_2009.jpg +3 -0
- images/1002_nm1404488_rm2595915264_1991-4-10_2010.jpg +3 -0
- images/1002_nm1404488_rm3253067264_1991-4-10_2013.jpg +3 -0
- images/1002_nm1404488_rm380025344_1991-4-10_2011.jpg +3 -0
- images/1003_nm0004395_rm1588953600_1973-4-3_2009.jpg +3 -0
- images/1003_nm0004395_rm202870272_1973-4-3_2010.jpg +3 -0
- images/1003_nm0004395_rm2100920576_1973-4-3_2009.jpg +3 -0
- images/1003_nm0004395_rm2391973888_1973-4-3_2012.jpg +3 -0
- images/1003_nm0004395_rm3014891520_1973-4-3_2002.jpg +3 -0
- images/1003_nm0004395_rm3628986880_1973-4-3_2015.jpg +3 -0
- images/1003_nm0004395_rm504860160_1973-4-3_2010.jpg +3 -0
- images/1004_nm0788202_rm1421581056_1964-11-27_2007.jpg +3 -0
- images/1004_nm0788202_rm1603177472_1964-11-27_2010.jpg +3 -0
- images/1004_nm0788202_rm1629587200_1964-11-27_2010.jpg +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.faiss filter=lfs diff=lfs merge=lfs -text
|
37 |
+
images/** filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
venv
|
.vscode/settings.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ros.distro": "noetic"
|
3 |
+
}
|
README.md
CHANGED
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
license: gpl-3.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
10 |
license: gpl-3.0
|
11 |
---
|
12 |
|
13 |
+
[GPR1200 Dataset](https://www.kaggle.com/datasets/mathurinache/gpr1200-dataset) is licensed with [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). Randomly picked 10.000 (~83.3%) images from the given dataset.
|
app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import gradio as gr
|
3 |
+
from PIL import Image
|
4 |
+
from transformers import AutoProcessor, AutoModel, AutoTokenizer
|
5 |
+
import torch
|
6 |
+
import faiss
|
7 |
+
import glob
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
11 |
+
|
12 |
+
model = AutoModel.from_pretrained("google/siglip-base-patch16-256-multilingual").to(device)
|
13 |
+
processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-256-multilingual")
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained("google/siglip-base-patch16-256-multilingual")
|
15 |
+
|
16 |
+
num_dimensions = model.vision_model.config.hidden_size # 768
|
17 |
+
num_k = 30
|
18 |
+
|
19 |
+
def preprocess_images(pathname="images/*", index_file="index.faiss"):
|
20 |
+
print("Preprocessing images...")
|
21 |
+
index = faiss.IndexFlatIP(num_dimensions) # Build the index using Inner Product (IP) similarity.
|
22 |
+
image_filenames = []
|
23 |
+
image_features = []
|
24 |
+
for image_filename in glob.glob(pathname):
|
25 |
+
try:
|
26 |
+
image_raw = Image.open(image_filename)
|
27 |
+
image_rgb = image_raw.convert('RGB')
|
28 |
+
image_filenames.append(image_filename)
|
29 |
+
inputs = processor(images=image_rgb, return_tensors="pt").to(device)
|
30 |
+
with torch.no_grad():
|
31 |
+
image_embedding = model.get_image_features(**inputs).to("cpu")
|
32 |
+
image_embedding_n = image_embedding / image_embedding.norm(p=2, dim=-1, keepdim=True)
|
33 |
+
image_embedding_n = image_embedding_n.numpy()
|
34 |
+
image_features.append(image_embedding_n)
|
35 |
+
except Exception as e:
|
36 |
+
print(f"Error processing {image_filename}".format(image_filename))
|
37 |
+
print(e)
|
38 |
+
exit(1)
|
39 |
+
|
40 |
+
print("Indexing images...")
|
41 |
+
image_features = np.concatenate(image_features, axis=0)
|
42 |
+
index.add(image_features)
|
43 |
+
|
44 |
+
print("Saving index...")
|
45 |
+
faiss.write_index(index, index_file)
|
46 |
+
with open("image_filenames.txt", "w") as f:
|
47 |
+
for image_filename in image_filenames:
|
48 |
+
f.write(image_filename + "\n")
|
49 |
+
|
50 |
+
print("Preprocessing complete.")
|
51 |
+
return index, image_filenames
|
52 |
+
|
53 |
+
def load_processed_images(index_file="index.faiss", image_filenames_file="image_filenames.txt"):
|
54 |
+
print("Loading index...")
|
55 |
+
index = faiss.read_index(index_file)
|
56 |
+
with open(image_filenames_file) as f:
|
57 |
+
image_filenames = f.readlines()
|
58 |
+
image_filenames = [x.strip() for x in image_filenames]
|
59 |
+
return index, image_filenames
|
60 |
+
|
61 |
+
@torch.no_grad()
|
62 |
+
def search_using_text(text):
|
63 |
+
inputs = tokenizer(text, padding="max_length", return_tensors="pt").to(device)
|
64 |
+
text_features = model.get_text_features(**inputs).to("cpu")
|
65 |
+
text_features_n = text_features / text_features.norm(p=2, dim=-1, keepdim=True)
|
66 |
+
text_features_n = text_features_n.numpy()
|
67 |
+
|
68 |
+
D, I = index.search(text_features_n, num_k)
|
69 |
+
|
70 |
+
scale = model.logit_scale.exp().cpu().numpy()
|
71 |
+
bias = model.logit_bias.cpu().numpy()
|
72 |
+
result = []
|
73 |
+
for dist, idx in zip(D[0], I[0]):
|
74 |
+
score_logit = dist * scale + bias
|
75 |
+
score_probability = torch.sigmoid(torch.tensor(score_logit)).item()
|
76 |
+
found_image = Image.open(image_filenames[idx])
|
77 |
+
found_image.load()
|
78 |
+
result.append((found_image, "{:.2f}%".format(score_probability*100)))
|
79 |
+
|
80 |
+
return result
|
81 |
+
|
82 |
+
@torch.no_grad()
|
83 |
+
def search_using_image(image):
|
84 |
+
image = Image.fromarray(image)
|
85 |
+
image_rgb = image.convert('RGB')
|
86 |
+
inputs = processor(images=image_rgb, return_tensors="pt").to(device)
|
87 |
+
|
88 |
+
image_embedding = model.get_image_features(**inputs).to("cpu")
|
89 |
+
image_embedding_n = image_embedding / image_embedding.norm(p=2, dim=-1, keepdim=True)
|
90 |
+
image_embedding_n = image_embedding_n.numpy()
|
91 |
+
|
92 |
+
D, I = index.search(image_embedding_n, num_k)
|
93 |
+
|
94 |
+
result = []
|
95 |
+
for dist, idx in zip(D[0], I[0]):
|
96 |
+
found_image = Image.open(image_filenames[idx])
|
97 |
+
found_image.load()
|
98 |
+
result.append(found_image)
|
99 |
+
|
100 |
+
return result
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
index, image_filenames = preprocess_images() # uncomment this line to preprocess images
|
104 |
+
index, image_filenames = load_processed_images()
|
105 |
+
|
106 |
+
with gr.Blocks() as demo:
|
107 |
+
gr.Markdown("# Image Search Engine Demo")
|
108 |
+
with gr.Row(equal_height=False):
|
109 |
+
with gr.Column():
|
110 |
+
gr.Markdown("This app is powered by [SigLIP](https://huggingface.co/google/siglip-base-patch16-256-multilingual) with multilingual support and [GPR1200 Dataset](https://www.kaggle.com/datasets/mathurinache/gpr1200-dataset) image contents. Enter your query in the text box or upload an image to search for similar images.")
|
111 |
+
with gr.Tab("Text-Image Search"):
|
112 |
+
text_input = gr.Textbox(label="Type a word or a sentence", placeholder="a frog waiting on a rock")
|
113 |
+
search_using_text_btn = gr.Button("Search with text", scale=0)
|
114 |
+
|
115 |
+
with gr.Tab("Image-Image Search"):
|
116 |
+
image_input = gr.Image()
|
117 |
+
search_using_image_btn = gr.Button("Search with image", scale=0)
|
118 |
+
|
119 |
+
gallery = gr.Gallery(label="Generated images", show_label=False,
|
120 |
+
elem_id="gallery", columns=3,
|
121 |
+
object_fit="contain", interactive=False, scale=3)
|
122 |
+
|
123 |
+
search_using_text_btn.click(search_using_text, inputs=text_input, outputs=gallery)
|
124 |
+
search_using_image_btn.click(search_using_image, inputs=image_input, outputs=gallery)
|
125 |
+
demo.launch(share=False)
|
image_filenames.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/0_430b273cb7915d83.jpg
ADDED
Git LFS Details
|
images/0_618577ba46bc3d04.jpg
ADDED
Git LFS Details
|
images/0_8684223b404f7e47.jpg
ADDED
Git LFS Details
|
images/0_c8f198d7da690cc8.jpg
ADDED
Git LFS Details
|
images/0_e7ec496a16bfed05.jpg
ADDED
Git LFS Details
|
images/0_e8b9b9a19ebf6a0d.jpg
ADDED
Git LFS Details
|
images/0_ff075584fa3ac3fe.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm1814797056_1975-7-6_2006.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm2092538624_1975-7-6_2006.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm2210106880_1975-7-6_2009.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm2516756224_1975-7-6_2013.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm3130391296_1975-7-6_2014.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm3375870464_1975-7-6_2011.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm518437632_1975-7-6_2011.jpg
ADDED
Git LFS Details
|
images/1000_nm1265067_rm964988416_1975-7-6_2009.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm1345886720_1978-2-9_2006.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm1602599168_1978-2-9_2010.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm2938931456_1978-2-9_2004.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm3014951936_1978-2-9_2010.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm3094448384_1978-2-9_2009.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm3115615232_1978-2-9_2010.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm323651840_1978-2-9_2009.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm3554462208_1978-2-9_2005.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm4114192896_1978-2-9_2000.jpg
ADDED
Git LFS Details
|
images/1001_nm0118568_rm908588288_1978-2-9_2014.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm1000782336_1991-4-10_2011.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm1784853248_1991-4-10_2011.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm1801630464_1991-4-10_2011.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm1820361728_1991-4-10_2010.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm1835184896_1991-4-10_2011.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm2423361024_1991-4-10_2009.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm2595915264_1991-4-10_2010.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm3253067264_1991-4-10_2013.jpg
ADDED
Git LFS Details
|
images/1002_nm1404488_rm380025344_1991-4-10_2011.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm1588953600_1973-4-3_2009.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm202870272_1973-4-3_2010.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm2100920576_1973-4-3_2009.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm2391973888_1973-4-3_2012.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm3014891520_1973-4-3_2002.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm3628986880_1973-4-3_2015.jpg
ADDED
Git LFS Details
|
images/1003_nm0004395_rm504860160_1973-4-3_2010.jpg
ADDED
Git LFS Details
|
images/1004_nm0788202_rm1421581056_1964-11-27_2007.jpg
ADDED
Git LFS Details
|
images/1004_nm0788202_rm1603177472_1964-11-27_2010.jpg
ADDED
Git LFS Details
|
images/1004_nm0788202_rm1629587200_1964-11-27_2010.jpg
ADDED
Git LFS Details
|