Spaces:

sohojoe
/

soho-clip-embeddings-explorer

Running

App Files Files Community

Joe Booth commited on Feb 10, 2023

Commit

0e61e04

1 Parent(s): 8498cb9

improve image load success rate

Browse files

Files changed (1) hide show

app.py +24 -7

app.py CHANGED Viewed

@@ -70,6 +70,14 @@ def base64_to_embedding(embeddings_b64):
     # embeddings = torch.tensor(embeddings)
     return embeddings
 def main(
     # input_im,
     embeddings,
@@ -84,17 +92,26 @@ def main(
     for result in results:
         if len(images) >= n_samples:
             break
         # dowload image
         import requests
         from io import BytesIO
         try:
-            response = requests.get(result["url"])
             if not response.ok:
                 continue
             bytes = BytesIO(response.content)
             image = Image.open(bytes)
-            image.title = str(result["similarity"]) + ' ' + result["caption"]
-            images.append(image)
         except Exception as e:
             print(e)
     return images
@@ -367,7 +384,7 @@ Try uploading a few images and/or add some text prompts and click generate image
         with gr.Column(scale=3, min_width=200):
             submit = gr.Button("Search embedding space")
     with gr.Row():
-        output = gr.Gallery(label="Generated variations")
     embedding_base64s_state = gr.State(value=[None for i in range(max_tabs)])
     embedding_power_state = gr.State(value=[1. for i in range(max_tabs)])
@@ -383,7 +400,7 @@ Try uploading a few images and/or add some text prompts and click generate image
     # submit.click(main, inputs= [embedding_base64s[0], scale, n_samples, steps, seed], outputs=output)
     submit.click(main, inputs= [average_embedding_base64, n_samples], outputs=output)
-    output.style(grid=2)
     with gr.Row():
         gr.Markdown(
@@ -394,7 +411,7 @@ My interest is to use CLIP for image/video understanding (see [CLIP_visual-spati
 ### Initial Features
 - Combine up to 10 Images and/or text inputs to create an average embedding space.
-- Search the laion 5b immages via a knn search
 ### Known limitations
@@ -402,7 +419,7 @@ My interest is to use CLIP for image/video understanding (see [CLIP_visual-spati
 ### Acknowledgements
-- I heavily build on [clip-retrieval](https://rom1504.github.io/clip-retrieval/) and use their API. Please [citate](https://github.com/rom1504/clip-retrieval#citation) the authors if you use this work.
 - [CLIP](https://openai.com/blog/clip/)
 - [Stable Diffusion](https://github.com/CompVis/stable-diffusion)

     # embeddings = torch.tensor(embeddings)
     return embeddings
+def safe_url(url):
+    import urllib.parse
+    url = urllib.parse.quote(url, safe=':/')
+    # if url has two .jpg filenames, take the first one
+    if url.count('.jpg') > 0:
+        url = url.split('.jpg')[0] + '.jpg'
+    return url
 def main(
     # input_im,
     embeddings,
     for result in results:
         if len(images) >= n_samples:
             break
+        url = safe_url(result["url"])
+        similarty = float("{:.4f}".format(result["similarity"]))
+        title = str(similarty) + ' ' + result["caption"]
+        # we could just return the url and the control would take care of the rest
+        # however, if the url returns an error, the page crashes.
+        # images.append((url, title))
+        # continue
         # dowload image
         import requests
         from io import BytesIO
         try:
+            response = requests.get(url)
             if not response.ok:
                 continue
             bytes = BytesIO(response.content)
             image = Image.open(bytes)
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+            images.append((image, title))
         except Exception as e:
             print(e)
     return images
         with gr.Column(scale=3, min_width=200):
             submit = gr.Button("Search embedding space")
     with gr.Row():
+        output = gr.Gallery(label="Closest images in Laion 5b using kNN", show_label=True)
     embedding_base64s_state = gr.State(value=[None for i in range(max_tabs)])
     embedding_power_state = gr.State(value=[1. for i in range(max_tabs)])
     # submit.click(main, inputs= [embedding_base64s[0], scale, n_samples, steps, seed], outputs=output)
     submit.click(main, inputs= [average_embedding_base64, n_samples], outputs=output)
+    output.style(grid=[4], height="auto")
     with gr.Row():
         gr.Markdown(
 ### Initial Features
 - Combine up to 10 Images and/or text inputs to create an average embedding space.
+- Search the laion 5b images via a kNN search
 ### Known limitations
 ### Acknowledgements
+- I heavily build on [clip-retrieval](https://rom1504.github.io/clip-retrieval/) and use their API. Please [cite](https://github.com/rom1504/clip-retrieval#citation) the authors if you use this work.
 - [CLIP](https://openai.com/blog/clip/)
 - [Stable Diffusion](https://github.com/CompVis/stable-diffusion)