moondream2 / app.py
vikhyatk's picture
Update app.py
1322687 verified
raw
history blame
1.85 kB
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
attn_implementation="flash_attention_2"
)
moondream.eval()
@spaces.GPU(duration=10)
def answer_question(img, prompt):
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
clean_text = re.sub("<$|<END$", "", new_text)
buffer += clean_text
yield buffer
with gr.Blocks() as demo:
gr.Markdown(
"""
# 🌔 moondream2
A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue().launch()