clip-embedding

Runtime error

App Files Files Community

nitinsurya commited on Jan 30

Commit

04bef66

•

1 Parent(s): 6f32cf9

Using Facebook metaclip model instead of sentence transformers clip.

Browse files

Files changed (5) hide show

Dockerfile +10 -3
app.py +29 -10
appV1.py +30 -0
poetry.lock +0 -0
pyproject.toml +23 -0

Dockerfile CHANGED Viewed

@@ -1,14 +1,21 @@
 FROM python:3.11-slim
 ARG GRADIO_SERVER_PORT=7860
 ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
 WORKDIR /app
-COPY requirements.txt /app/
-RUN pip install -r /app/requirements.txt
 COPY app.py /app/
-CMD ["python", "/app/app.py"]

 FROM python:3.11-slim
+# Install Poetry
+RUN pip install poetry==1.7.1
+RUN poetry config virtualenvs.create false
 ARG GRADIO_SERVER_PORT=7860
 ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
 WORKDIR /app
+COPY ./pyproject.toml ./poetry.lock* /app/
+# Allow installing dev dependencies to run tests
+ARG INSTALL_DEV=true
+RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
 COPY app.py /app/
+CMD ["python", "/app/app.py"]
+# CMD ["gradio", "/app/app.py"]

app.py CHANGED Viewed

@@ -2,16 +2,26 @@ import gradio as gr
 import numpy as np
 from PIL import Image
 from pathlib import Path
-from sentence_transformers import SentenceTransformer
 cache_path = Path('/app/cache')
-if cache_path.exists():
-    print('Cache folder exists, loading from cache')
-    model = SentenceTransformer('clip-ViT-B-32', cache_folder=cache_path)
-else:
-    print('Cache folder does not exist, loading from web')
-    model = SentenceTransformer('clip-ViT-B-32')
 def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
@@ -19,12 +29,21 @@ def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
         return []
     if img is not None:
-        embedding = model.encode(sentences=[Image.fromarray(img)], batch_size=128)
     else:
-        embedding = model.encode(sentences=[txt], batch_size=128)
-    return embedding
 demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
 demo.launch(server_name="0.0.0.0")

 import numpy as np
 from PIL import Image
 from pathlib import Path
+import torch
+from transformers import CLIPProcessor, CLIPModel
+MODEL_NAME = "facebook/metaclip-b32-400m"
 cache_path = Path('/app/cache')
+if not cache_path.exists():
+    cache_path = None
+def get_clip_model_and_processor(model_name: str, cache_path: Path = None):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if cache_path:
+        model = CLIPModel.from_pretrained(model_name, cache_dir=str(cache_path)).to(device)
+        processor = CLIPProcessor.from_pretrained(model_name, cache_dir=str(cache_path))
+    else:
+        model = CLIPModel.from_pretrained(model_name).to(device)
+        processor = CLIPProcessor.from_pretrained(model_name)
+    return model.eval(), processor
 def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
         return []
     if img is not None:
+        embedding = CLIP_MODEL.get_image_features(
+            **CLIP_PROCESSOR(images=[Image.fromarray(img)], return_tensors="pt", padding=True).to(
+                CLIP_MODEL.device
+            )
+        )
     else:
+        embedding = CLIP_MODEL.get_text_features(
+            **CLIP_PROCESSOR(text=[txt], return_tensors="pt", padding=True).to(
+                CLIP_MODEL.device
+            )
+        )
+    return embedding.detach().cpu().numpy()
+CLIP_MODEL, CLIP_PROCESSOR = get_clip_model_and_processor(MODEL_NAME, cache_path=cache_path)
 demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
 demo.launch(server_name="0.0.0.0")

appV1.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+cache_path = Path('/app/cache')
+if cache_path.exists():
+    print('Cache folder exists, loading from cache')
+    model = SentenceTransformer('clip-ViT-B-32', cache_folder=cache_path)
+else:
+    print('Cache folder does not exist, loading from web')
+    model = SentenceTransformer('clip-ViT-B-32')
+def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray:
+    if img is None and not txt:
+        return []
+    if img is not None:
+        embedding = model.encode(sentences=[Image.fromarray(img)], batch_size=128)
+    else:
+        embedding = model.encode(sentences=[txt], batch_size=128)
+    return embedding
+demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True)
+demo.launch(server_name="0.0.0.0")

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[tool.poetry]
+name = "clip-embedding"
+version = "0.1.0"
+description = ""
+authors = ["Nitin Surya <nitin@dubverse.ai>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.11"
+gradio = "^4.16.0"
+numpy = "^1.26.3"
+pillow = "^10.2.0"
+transformers = "^4.37.2"
+torch = {version = "^2.2.0+cpu", source = "pytorch-cpu"}
+[[tool.poetry.source]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "secondary"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"