nitinsurya commited on
Commit
04bef66
1 Parent(s): 6f32cf9

Using Facebook metaclip model instead of sentence transformers clip.

Browse files
Files changed (5) hide show
  1. Dockerfile +10 -3
  2. app.py +29 -10
  3. appV1.py +30 -0
  4. poetry.lock +0 -0
  5. pyproject.toml +23 -0
Dockerfile CHANGED
@@ -1,14 +1,21 @@
1
  FROM python:3.11-slim
2
 
 
 
 
 
3
  ARG GRADIO_SERVER_PORT=7860
4
  ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
5
 
6
  WORKDIR /app
7
 
8
- COPY requirements.txt /app/
9
 
10
- RUN pip install -r /app/requirements.txt
 
 
11
 
12
  COPY app.py /app/
13
 
14
- CMD ["python", "/app/app.py"]
 
 
1
  FROM python:3.11-slim
2
 
3
+ # Install Poetry
4
+ RUN pip install poetry==1.7.1
5
+ RUN poetry config virtualenvs.create false
6
+
7
  ARG GRADIO_SERVER_PORT=7860
8
  ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
9
 
10
  WORKDIR /app
11
 
12
+ COPY ./pyproject.toml ./poetry.lock* /app/
13
 
14
+ # Allow installing dev dependencies to run tests
15
+ ARG INSTALL_DEV=true
16
+ RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
17
 
18
  COPY app.py /app/
19
 
20
+ CMD ["python", "/app/app.py"]
21
+ # CMD ["gradio", "/app/app.py"]
app.py CHANGED
@@ -2,16 +2,26 @@ import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
  from pathlib import Path
5
- from sentence_transformers import SentenceTransformer
 
 
 
 
6
 
7
  cache_path = Path('/app/cache')
 
 
8
 
9
- if cache_path.exists():
10
- print('Cache folder exists, loading from cache')
11
- model = SentenceTransformer('clip-ViT-B-32', cache_folder=cache_path)
12
- else:
13
- print('Cache folder does not exist, loading from web')
14
- model = SentenceTransformer('clip-ViT-B-32')
 
 
 
 
15
 
16
 
17
  def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
@@ -19,12 +29,21 @@ def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
19
  return []
20
 
21
  if img is not None:
22
- embedding = model.encode(sentences=[Image.fromarray(img)], batch_size=128)
 
 
 
 
23
  else:
24
- embedding = model.encode(sentences=[txt], batch_size=128)
 
 
 
 
25
 
26
- return embedding
27
 
 
28
 
29
  demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
30
  demo.launch(server_name="0.0.0.0")
 
2
  import numpy as np
3
  from PIL import Image
4
  from pathlib import Path
5
+ import torch
6
+ from transformers import CLIPProcessor, CLIPModel
7
+
8
+
9
+ MODEL_NAME = "facebook/metaclip-b32-400m"
10
 
11
  cache_path = Path('/app/cache')
12
+ if not cache_path.exists():
13
+ cache_path = None
14
 
15
+
16
+ def get_clip_model_and_processor(model_name: str, cache_path: Path = None):
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ if cache_path:
19
+ model = CLIPModel.from_pretrained(model_name, cache_dir=str(cache_path)).to(device)
20
+ processor = CLIPProcessor.from_pretrained(model_name, cache_dir=str(cache_path))
21
+ else:
22
+ model = CLIPModel.from_pretrained(model_name).to(device)
23
+ processor = CLIPProcessor.from_pretrained(model_name)
24
+ return model.eval(), processor
25
 
26
 
27
  def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
 
29
  return []
30
 
31
  if img is not None:
32
+ embedding = CLIP_MODEL.get_image_features(
33
+ **CLIP_PROCESSOR(images=[Image.fromarray(img)], return_tensors="pt", padding=True).to(
34
+ CLIP_MODEL.device
35
+ )
36
+ )
37
  else:
38
+ embedding = CLIP_MODEL.get_text_features(
39
+ **CLIP_PROCESSOR(text=[txt], return_tensors="pt", padding=True).to(
40
+ CLIP_MODEL.device
41
+ )
42
+ )
43
 
44
+ return embedding.detach().cpu().numpy()
45
 
46
+ CLIP_MODEL, CLIP_PROCESSOR = get_clip_model_and_processor(MODEL_NAME, cache_path=cache_path)
47
 
48
  demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
49
  demo.launch(server_name="0.0.0.0")
appV1.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ from pathlib import Path
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ cache_path = Path('/app/cache')
8
+
9
+ if cache_path.exists():
10
+ print('Cache folder exists, loading from cache')
11
+ model = SentenceTransformer('clip-ViT-B-32', cache_folder=cache_path)
12
+ else:
13
+ print('Cache folder does not exist, loading from web')
14
+ model = SentenceTransformer('clip-ViT-B-32')
15
+
16
+
17
+ def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
18
+ if img is None and not txt:
19
+ return []
20
+
21
+ if img is not None:
22
+ embedding = model.encode(sentences=[Image.fromarray(img)], batch_size=128)
23
+ else:
24
+ embedding = model.encode(sentences=[txt], batch_size=128)
25
+
26
+ return embedding
27
+
28
+
29
+ demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
30
+ demo.launch(server_name="0.0.0.0")
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "clip-embedding"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Nitin Surya <nitin@dubverse.ai>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.11"
10
+ gradio = "^4.16.0"
11
+ numpy = "^1.26.3"
12
+ pillow = "^10.2.0"
13
+ transformers = "^4.37.2"
14
+ torch = {version = "^2.2.0+cpu", source = "pytorch-cpu"}
15
+
16
+ [[tool.poetry.source]]
17
+ name = "pytorch-cpu"
18
+ url = "https://download.pytorch.org/whl/cpu"
19
+ priority = "secondary"
20
+
21
+ [build-system]
22
+ requires = ["poetry-core"]
23
+ build-backend = "poetry.core.masonry.api"