vlm-playground

Runtime error

App Files Files Community

vlm-playground / app.py

edbeeching HF staff

Update app.py

17ec0aa verified 8 months ago

raw

history blame contribute delete

2.86 kB

	import gradio as gr
	from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
	from threading import Thread
	import re
	import time
	from PIL import Image
	import torch
	import spaces
	import os
	from huggingface_hub import login

	login(token=os.environ["HF_TOKEN"])

	MODEL_ID = os.environ["MODEL_ID"]
	REVISION = os.environ["MODEL_REVISION"]
	processor = LlavaProcessor.from_pretrained(MODEL_ID, revision=REVISION)

	model = LlavaForConditionalGeneration.from_pretrained(MODEL_ID, revision=REVISION, torch_dtype=torch.float16, low_cpu_mem_usage=True)
	model.to("cuda:0")

	@spaces.GPU
	def bot_streaming(message, history):
	print(message)
	if message["files"]:
	image = message["files"][-1]["path"]
	else:
	# if there's no image uploaded for this turn, look for images in the past turns
	# kept inside tuples, take the last one
	for hist in history:
	if type(hist[0])==tuple:
	image = hist[0][0]

	if image is None:
	gr.Error("You need to upload an image for LLaVA to work.")
	prompt=f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{message['text']}\nASSISTANT:" #f"[INST] <image>\n{message['text']} [/INST]"
	image = Image.open(image).convert("RGB")
	inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")

	streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True})
	generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512)
	generated_text = ""

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	text_prompt =f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: \n{message['text']}\nASSISTANT: " #f"[INST] \n{message['text']} [/INST]"


	buffer = ""
	for new_text in streamer:

	buffer += new_text

	generated_text_without_prompt = buffer[len(text_prompt):]
	time.sleep(0.04)
	yield generated_text_without_prompt


	demo = gr.ChatInterface(fn=bot_streaming, title="VLM Playground", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
	{"text": "How to make this pastry?", "files":["./baklava.png"]},
	{"text": "What is this?", "files":["./pizza2.jpeg"]}],
	description="VLM Playground host HuggingFaceH4/vsft-llava-1.5-7b-hf-trl a llava SFT finetune using TRL's SFTTrainer", #for internal VLMs. Change the model ID and revision under the environments of the Space settings.
	stop_btn="Stop Generation", multimodal=True)
	demo.launch(debug=True)