|
import torch |
|
import re |
|
import os |
|
import gradio as gr |
|
from threading import Thread |
|
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM |
|
|
|
from PIL import Image |
|
from io import BytesIO |
|
import base64 |
|
|
|
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret') |
|
|
|
|
|
data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,') |
|
|
|
def readb64(b64): |
|
|
|
b64 = data_uri_pattern.sub("", b64) |
|
|
|
img = Image.open(BytesIO(base64.b64decode(b64))) |
|
return img |
|
|
|
|
|
|
|
|
|
|
|
|
|
model_id = "vikhyatk/moondream2" |
|
revision = "2024-04-02" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) |
|
moondream = AutoModelForCausalLM.from_pretrained( |
|
model_id, trust_remote_code=True, revision=revision, |
|
torch_dtype=torch.bfloat16, device_map={"": "cuda"}, |
|
attn_implementation="flash_attention_2" |
|
) |
|
moondream.eval() |
|
|
|
def answer_question(secret_token, input, prompt): |
|
if secret_token != SECRET_TOKEN: |
|
raise gr.Error( |
|
f'Invalid secret token. Please fork the original space if you want to use it for yourself.') |
|
|
|
img = readb64(input) |
|
|
|
image_embeds = moondream.encode_image(img) |
|
|
|
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) |
|
|
|
thread = Thread( |
|
target=moondream.answer_question, |
|
kwargs={ |
|
"image_embeds": image_embeds, |
|
"question": prompt, |
|
"tokenizer": tokenizer, |
|
"streamer": streamer, |
|
}, |
|
) |
|
thread.start() |
|
|
|
buffer = "" |
|
for new_text in streamer: |
|
buffer += new_text |
|
|
|
buffer.strip() |
|
|
|
return buffer |
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML(""" |
|
<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;"> |
|
<div style="text-align: center; color: black;"> |
|
<p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p> |
|
<p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/vikhyatk/moondream2" target="_blank">original space</a>.</p> |
|
</div> |
|
</div>""") |
|
token = gr.Textbox() |
|
input = gr.Textbox() |
|
prompt = gr.Textbox() |
|
submit = gr.Button() |
|
output = gr.Textbox() |
|
submit.click(answer_question, [token, input, prompt], output) |
|
|
|
demo.queue().launch() |
|
|