Joe Booth commited on
Commit
0e61e04
·
1 Parent(s): 8498cb9

improve image load success rate

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -70,6 +70,14 @@ def base64_to_embedding(embeddings_b64):
70
  # embeddings = torch.tensor(embeddings)
71
  return embeddings
72
 
 
 
 
 
 
 
 
 
73
  def main(
74
  # input_im,
75
  embeddings,
@@ -84,17 +92,26 @@ def main(
84
  for result in results:
85
  if len(images) >= n_samples:
86
  break
 
 
 
 
 
 
 
 
87
  # dowload image
88
  import requests
89
  from io import BytesIO
90
  try:
91
- response = requests.get(result["url"])
92
  if not response.ok:
93
  continue
94
  bytes = BytesIO(response.content)
95
  image = Image.open(bytes)
96
- image.title = str(result["similarity"]) + ' ' + result["caption"]
97
- images.append(image)
 
98
  except Exception as e:
99
  print(e)
100
  return images
@@ -367,7 +384,7 @@ Try uploading a few images and/or add some text prompts and click generate image
367
  with gr.Column(scale=3, min_width=200):
368
  submit = gr.Button("Search embedding space")
369
  with gr.Row():
370
- output = gr.Gallery(label="Generated variations")
371
 
372
  embedding_base64s_state = gr.State(value=[None for i in range(max_tabs)])
373
  embedding_power_state = gr.State(value=[1. for i in range(max_tabs)])
@@ -383,7 +400,7 @@ Try uploading a few images and/or add some text prompts and click generate image
383
 
384
  # submit.click(main, inputs= [embedding_base64s[0], scale, n_samples, steps, seed], outputs=output)
385
  submit.click(main, inputs= [average_embedding_base64, n_samples], outputs=output)
386
- output.style(grid=2)
387
 
388
  with gr.Row():
389
  gr.Markdown(
@@ -394,7 +411,7 @@ My interest is to use CLIP for image/video understanding (see [CLIP_visual-spati
394
  ### Initial Features
395
 
396
  - Combine up to 10 Images and/or text inputs to create an average embedding space.
397
- - Search the laion 5b immages via a knn search
398
 
399
  ### Known limitations
400
 
@@ -402,7 +419,7 @@ My interest is to use CLIP for image/video understanding (see [CLIP_visual-spati
402
 
403
  ### Acknowledgements
404
 
405
- - I heavily build on [clip-retrieval](https://rom1504.github.io/clip-retrieval/) and use their API. Please [citate](https://github.com/rom1504/clip-retrieval#citation) the authors if you use this work.
406
  - [CLIP](https://openai.com/blog/clip/)
407
  - [Stable Diffusion](https://github.com/CompVis/stable-diffusion)
408
 
 
70
  # embeddings = torch.tensor(embeddings)
71
  return embeddings
72
 
73
+ def safe_url(url):
74
+ import urllib.parse
75
+ url = urllib.parse.quote(url, safe=':/')
76
+ # if url has two .jpg filenames, take the first one
77
+ if url.count('.jpg') > 0:
78
+ url = url.split('.jpg')[0] + '.jpg'
79
+ return url
80
+
81
  def main(
82
  # input_im,
83
  embeddings,
 
92
  for result in results:
93
  if len(images) >= n_samples:
94
  break
95
+ url = safe_url(result["url"])
96
+ similarty = float("{:.4f}".format(result["similarity"]))
97
+ title = str(similarty) + ' ' + result["caption"]
98
+
99
+ # we could just return the url and the control would take care of the rest
100
+ # however, if the url returns an error, the page crashes.
101
+ # images.append((url, title))
102
+ # continue
103
  # dowload image
104
  import requests
105
  from io import BytesIO
106
  try:
107
+ response = requests.get(url)
108
  if not response.ok:
109
  continue
110
  bytes = BytesIO(response.content)
111
  image = Image.open(bytes)
112
+ if image.mode != 'RGB':
113
+ image = image.convert('RGB')
114
+ images.append((image, title))
115
  except Exception as e:
116
  print(e)
117
  return images
 
384
  with gr.Column(scale=3, min_width=200):
385
  submit = gr.Button("Search embedding space")
386
  with gr.Row():
387
+ output = gr.Gallery(label="Closest images in Laion 5b using kNN", show_label=True)
388
 
389
  embedding_base64s_state = gr.State(value=[None for i in range(max_tabs)])
390
  embedding_power_state = gr.State(value=[1. for i in range(max_tabs)])
 
400
 
401
  # submit.click(main, inputs= [embedding_base64s[0], scale, n_samples, steps, seed], outputs=output)
402
  submit.click(main, inputs= [average_embedding_base64, n_samples], outputs=output)
403
+ output.style(grid=[4], height="auto")
404
 
405
  with gr.Row():
406
  gr.Markdown(
 
411
  ### Initial Features
412
 
413
  - Combine up to 10 Images and/or text inputs to create an average embedding space.
414
+ - Search the laion 5b images via a kNN search
415
 
416
  ### Known limitations
417
 
 
419
 
420
  ### Acknowledgements
421
 
422
+ - I heavily build on [clip-retrieval](https://rom1504.github.io/clip-retrieval/) and use their API. Please [cite](https://github.com/rom1504/clip-retrieval#citation) the authors if you use this work.
423
  - [CLIP](https://openai.com/blog/clip/)
424
  - [Stable Diffusion](https://github.com/CompVis/stable-diffusion)
425