import spaces import os import time import torch import gradio as gr from threading import Thread from PIL import Image # Install required packages import subprocess subprocess.run('pip install --upgrade transformers', shell=True) subprocess.run('pip install accelerate', shell=True) from transformers import AutoConfig, PreTrainedModel, AutoTokenizer # Model and tokenizer initialization model_name = "Qwen/QVQ-72B-Preview" config = AutoConfig.from_pretrained( model_name, trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True ) model = PreTrainedModel.from_pretrained( model_name, config=config, trust_remote_code=True, device_map="auto", torch_dtype=torch.float16 ) # Footer footer = """

Powered by QVQ-72B Model

""" # Vision model function @spaces.GPU() def process_image(image, text_input=None): try: # Convert image to PIL format image = Image.fromarray(image).convert("RGB") # Prepare inputs if text_input: messages = [ { "role": "user", "content": [ {"image": image}, {"text": text_input} ] } ] else: messages = [ { "role": "user", "content": [ {"image": image}, {"text": "Please describe this image in detail."} ] } ] # Generate response response = model.chat(tokenizer, messages=messages) return response except Exception as e: return f"Error processing image: {str(e)}" # CSS styling css = """ footer { visibility: hidden; } """ # Gradio interface with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo: with gr.Row(): input_img = gr.Image(label="Input Image") with gr.Row(): text_input = gr.Textbox(label="Question (Optional)") with gr.Row(): submit_btn = gr.Button(value="Submit") with gr.Row(): output_text = gr.Textbox(label="Response") submit_btn.click(process_image, [input_img, text_input], [output_text]) gr.HTML(footer) # Launch the app demo.launch(debug=True)