PHI35VISION

Runtime error

PHI35VISION / app.py

Update app.py

723de5f verified about 1 month ago

1.69 kB

	import spaces
	import os
	import time
	import torch
	from transformers import AutoProcessor, AutoModelForImageTextToText
	import gradio as gr
	from threading import Thread
	from PIL import Image

	# Model and processor initialization
	processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
	model = AutoModelForImageTextToText.from_pretrained("Qwen/QVQ-72B-Preview").cuda().eval()

	# Footer
	footer = """
	<div style="text-align: center; margin-top: 20px;">
	<p>Powered by QVQ-72B Model</p>
	</div>
	"""

	# Vision model function
	@spaces.GPU()
	def process_image(image, text_input=None):
	# Convert image to PIL format
	image = Image.fromarray(image).convert("RGB")

	# Prepare inputs
	if text_input:
	inputs = processor(text=text_input, images=image, return_tensors="pt").to("cuda:0")
	else:
	inputs = processor(images=image, return_tensors="pt").to("cuda:0")

	# Generate output
	outputs = model.generate(**inputs, max_new_tokens=1000)

	# Decode response
	response = processor.batch_decode(outputs, skip_special_tokens=True)[0]

	return response

	# CSS styling
	css = """
	footer {
	visibility: hidden;
	}
	"""

	# Gradio interface
	with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
	with gr.Row():
	input_img = gr.Image(label="Input Image")
	with gr.Row():
	text_input = gr.Textbox(label="Question (Optional)")
	with gr.Row():
	submit_btn = gr.Button(value="Submit")
	with gr.Row():
	output_text = gr.Textbox(label="Response")

	submit_btn.click(process_image, [input_img, text_input], [output_text])

	gr.HTML(footer)

	# Launch the app
	demo.launch(debug=True)