Spaces:

cyberagent
/

llava-calm2-preview

Running

+FROM python:3.11-slim
+WORKDIR /usr/src/app
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+# Get service account key from HF Spaces' secrets
+# https://huggingface.co/docs/hub/spaces-sdks-docker#buildtime
+RUN --mount=type=secret,id=GCLOUD_SA_JSON,mode=0444,required=true \
+   cat /run/secrets/GCLOUD_SA_JSON > /usr/src/app/credentials.json
+RUN --mount=type=secret,id=GCLOUD_OBJECT_SA_JSON,mode=0444,required=true \
+   cat /run/secrets/GCLOUD_OBJECT_SA_JSON > /usr/src/app/credentials_object.json
+RUN --mount=type=secret,id=BUCKET_NAME,mode=0444,required=true \
+    --mount=type=secret,id=CLI_OBJECT_NAME,mode=0444,required=true \
+    python download_gcs_object.py --bucket-name $(cat /run/secrets/BUCKET_NAME) --object-name $(cat /run/secrets/CLI_OBJECT_NAME)
+RUN --mount=type=secret,id=CLI_OBJECT_NAME,mode=0444,required=true \
+    tar -xf $(cat /run/secrets/CLI_OBJECT_NAME)
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Llava Calm2 Preview
-emoji: 🏃
-colorFrom: pink
-colorTo: green
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: llava-calm2-preview
+emoji: 🎨
+colorFrom: purple
+colorTo: yellow
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# HF Gradio Spaces for llava-calm2

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import base64
+import io
+import os
+import subprocess
+from functools import partial
+import gradio as gr
+import httpx
+from const import BASE_URL, CLI_COMMAND, CSS, FOOTER, HEADER, MODELS, PLACEHOLDER
+from openai import OpenAI
+from PIL import Image
+def get_token() -> str:
+    return (
+        subprocess.run(
+            CLI_COMMAND,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            env=os.environ.copy(),
+        )
+        .stdout.decode("utf-8")
+        .strip()
+    )
+def get_headers(host: str) -> dict:
+    return {
+        "Authorization": f"Bearer {get_token()}",
+        "Host": host,
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+def proxy(request: httpx.Request, model_info: dict) -> httpx.Request:
+    request.url = request.url.copy_with(path=model_info["endpoint"])
+    request.headers.update(get_headers(host=model_info["host"]))
+    return request
+def encode_image_with_pillow(image_path: str) -> str:
+    with Image.open(image_path) as img:
+        img.thumbnail((384, 384))
+        buffered = io.BytesIO()
+        img.convert("RGB").save(buffered, format="JPEG")
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def call_chat_api(message, history, model_name):
+    if message["files"]:
+        if isinstance(message["files"], dict):
+            image = message["files"]["path"]
+        else:
+            image = message["files"][-1]
+    else:
+        for hist in history:
+            if isinstance(hist[0], tuple):
+                image = hist[0][0]
+    img_base64 = encode_image_with_pillow(image)
+    history_openai_format = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{img_base64}",
+                    },
+                },
+            ],
+        }
+    ]
+    if len(history) == 0:
+        history_openai_format[0]["content"].append(
+            {"type": "text", "text": message["text"]}
+        )
+    else:
+        for human, assistant in history[1:]:
+            if len(history_openai_format) == 1:
+                history_openai_format[0]["content"].append(
+                    {"type": "text", "text": human}
+                )
+            else:
+                history_openai_format.append({"role": "user", "content": human})
+            history_openai_format.append({"role": "assistant", "content": assistant})
+        history_openai_format.append({"role": "user", "content": message["text"]})
+    client = OpenAI(
+        api_key="",
+        base_url=BASE_URL,
+        http_client=httpx.Client(
+            event_hooks={
+                "request": [partial(proxy, model_info=MODELS[model_name])],
+            },
+            verify=False,
+        ),
+    )
+    stream = client.chat.completions.create(
+        model=f"/data/cyberagent/{model_name}",
+        messages=history_openai_format,
+        temperature=0.2,
+        top_p=1.0,
+        max_tokens=1024,
+        stream=True,
+        extra_body={"repetition_penalty": 1.1},
+    )
+    message = ""
+    for chunk in stream:
+        content = chunk.choices[0].delta.content or ""
+        message = message + content
+        yield message
+def run():
+    chatbot = gr.Chatbot(
+        elem_id="chatbot", placeholder=PLACEHOLDER, scale=1, height=700
+    )
+    chat_input = gr.MultimodalTextbox(
+        interactive=True,
+        file_types=["image"],
+        placeholder="Enter message or upload file...",
+        show_label=False,
+    )
+    with gr.Blocks(css=CSS) as demo:
+        gr.Markdown(HEADER)
+        with gr.Row():
+            model_selector = gr.Dropdown(
+                choices=MODELS.keys(),
+                value=list(MODELS.keys())[0],
+                label="Model",
+            )
+        gr.ChatInterface(
+            fn=call_chat_api,
+            stop_btn="Stop Generation",
+            examples=[
+                [
+                    {
+                        "text": "この画像を詳しく説明してください。",
+                        "files": ["./examples/cat.jpg"],
+                    },
+                ],
+                [
+                    {
+                        "text": "この料理はどんな味がするか詳しく教えてください。",
+                        "files": ["./examples/takoyaki.jpg"],
+                    },
+                ],
+            ],
+            multimodal=True,
+            textbox=chat_input,
+            chatbot=chatbot,
+            additional_inputs=[model_selector],
+        )
+        gr.Markdown(FOOTER)
+    demo.queue().launch(share=False)
+if __name__ == "__main__":
+    run()

const.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+from pathlib import Path
+SECRET_PREFIX = os.environ.get("SECRET_PREFIX", "")
+PROJECT_ID = os.environ.get("PROJECT_ID", "")
+ROLE_SUBJECT = os.environ.get("ROLE_SUBJECT", "")
+CREDENTIALS = os.environ.get("CREDENTIALS", "")
+os.environ[SECRET_PREFIX + "PROJECT_ID"] = PROJECT_ID
+os.environ[SECRET_PREFIX + "ROLE_SUBJECT"] = ROLE_SUBJECT
+os.environ[SECRET_PREFIX + "CREDENTIALS"] = CREDENTIALS
+BASE_URL = os.environ.get("GCLOUD_BASE_URL", "")
+BASE_ENDPOINT = os.environ.get("GCLOUD_ENDPOINT", "")
+CHATTY_ENDPOINT = os.environ.get("GCLOUD_CHATTY_ENDPOINT", "")
+BASE_HOST = os.environ.get("GCLOUD_HOST", "")
+CHATTY_HOST = os.environ.get("GCLOUD_CHATTY_HOST", "")
+CLI_COMMAND_NAME = os.environ.get("CLI_COMMAND_NAME", "")
+CLI_ARG1 = os.environ.get("CLI_ARG1", "")
+CLI_ARG2 = os.environ.get("CLI_ARG2", "")
+ROOT_DIR = Path(__file__).parent.absolute()
+GCLOUD_BIN = str(ROOT_DIR / CLI_COMMAND_NAME)
+CLI_COMMAND = [GCLOUD_BIN, CLI_ARG1, CLI_ARG2]
+MODELS = {
+    "llava-calm2-siglip-chatty": {"endpoint": CHATTY_ENDPOINT, "host": CHATTY_HOST},
+    "llava-calm2-siglip": {"endpoint": BASE_ENDPOINT, "host": BASE_HOST},
+}
+HEADER = """
+# LLaVA-CALM2-SigLIP
+LLaVA-CALM2-SigLIPは、calm2-7b-chatとsiglip-so400m-patch14-384からファインチューニングされたLLaVAモデルです。
+## Models
+- **llava-calm2-siglip**: 公開データを用いて学習されたVLM
+- **llava-calm2-siglip-chatty**: よりチャットに最適化するように学習したVLM
+"""
+FOOTER = """
+## Term of Use
+Please note that by using this service, you agree to the following terms: This model is provided for research purposes only. CyberAgent expressly disclaim any liability for direct, indirect, special, incidental, or consequential damages, as well as for any losses that may result from using this model, regardless of the outcomes. It is essential for users to fully understand these limitations before employing the model.
+## License
+The service is a research preview intended for non-commercial use only.
+"""
+PLACEHOLDER = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <img src="https://d23iyfk1a359di.cloudfront.net/files/topics/26317_ext_03_0.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  ">
+   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLaVA-CALM2-SigLIP</h1>
+   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">LLaVA-CALM2-SigLIP is a LLaVA model fine-tuned from calm2-7b-chat and siglip-so400m-patch14-384</p>
+</div>
+"""
+CSS = """
+#chatbot {
+    height: auto !important;
+    max_height: none !important;
+    overflow: auto !important;
+    flex-grow: 1 !important;
+}
+"""

download_gcs_object.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import argparse
+import json
+from google.cloud import storage
+from google.oauth2 import service_account
+def download_gcs_object(bucket_name: str, object_name: str):
+    with open("/usr/src/app/credentials_object.json", "r") as f:
+        credentials_dict = json.load(f)
+    credentials = service_account.Credentials.from_service_account_info(
+        credentials_dict
+    )
+    client = storage.Client(
+        credentials=credentials, project=credentials_dict["project_id"]
+    )
+    blob = client.bucket(bucket_name).blob(object_name)
+    blob.download_to_filename(object_name)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--bucket-name", type=str, required=True)
+    parser.add_argument("--object-name", type=str, required=True)
+    args = parser.parse_args()
+    download_gcs_object(args.bucket_name, args.object_name)

examples/LICENSE.md ADDED Viewed

	@@ -0,0 +1,8 @@

+## Licenses
+| title                                          | filename      | source_url                                            | license       |
+|------------------------------------------------|---------------|-------------------------------------------------------|---------------|
+| \[フリー写真\] 窓の外を見ている猫の横顔 | cat.jpg       | https://publicdomainq.net/cat-watch-animal-photo-0079232/ | public domain |
+| \[フリー写真\] 食べ物のプレートが乗った木製のテーブル | takoyaki.jpg | https://unsplash.com/photos/LipkIP4fXbM | unsplash |

examples/cat.jpg ADDED Viewed

examples/takoyaki.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+httpx
+openai
+pillow
+google-cloud-storage
+google-auth