PHI35VISION

Runtime error

App Files Files Community

aiqtech commited on 17 days ago

Commit

723de5f

verified ·

1 Parent(s): 97313a7

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -221

app.py CHANGED Viewed

@@ -2,249 +2,63 @@ import spaces
 import os
 import time
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig, AutoProcessor
 import gradio as gr
 from threading import Thread
 from PIL import Image
-import subprocess
-# Install flash-attn if not already installed
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-# Define placeholder and footer
-PLACEHOLDER = "Send a message..."
 footer = """
 <div style="text-align: center; margin-top: 20px;">
-    <p>Powered by Phi-3.5 Models</p>
 </div>
 """
-# Model and tokenizer for the chatbot
-MODEL_ID1 = "microsoft/Phi-3.5-mini-instruct"
-MODEL_LIST1 = ["microsoft/Phi-3.5-mini-instruct"]
-HF_TOKEN = os.environ.get("HF_TOKEN", None)
-device = "cuda" if torch.cuda.is_available() else "cpu"  # for GPU usage or "cpu" for CPU usage / But you need GPU :)
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.bfloat16,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID1,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    quantization_config=quantization_config)
-# Chatbot tab function
-@spaces.GPU()
-def stream_chat(
-    message: str,
-    history: list,
-    system_prompt: str,
-    temperature: float = 0.8,
-    max_new_tokens: int = 1024,
-    top_p: float = 1.0,
-    top_k: int = 20,
-    penalty: float = 1.2,
-):
-    print(f'message: {message}')
-    print(f'history: {history}')
-    conversation = [
-        {"role": "system", "content": system_prompt}
-    ]
-    for prompt, answer in history:
-        conversation.extend([
-            {"role": "user", "content": prompt},
-            {"role": "assistant", "content": answer},
-        ])
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        input_ids=input_ids,
-        max_new_tokens = max_new_tokens,
-        do_sample = False if temperature == 0 else True,
-        top_p = top_p,
-        top_k = top_k,
-        temperature = temperature,
-        eos_token_id=[128001,128008,128009],
-        streamer=streamer,
-    )
-    with torch.no_grad():
-        thread = Thread(target=model.generate, kwargs=generate_kwargs)
-        thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        yield buffer
-# Vision model setup
-models = {
-    "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
-}
-processors = {
-    "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
-}
-user_prompt = '\n'
-assistant_prompt = '\n'
-prompt_suffix = "\n"
-# Vision model tab function
 @spaces.GPU()
-def stream_vision(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
-    model = models[model_id]
-    processor = processors[model_id]
-    # Prepare the image list and corresponding tags
-    images = [Image.fromarray(image).convert("RGB")]
-    placeholder = "<|image_1|>\n"  # Using the image tag as per the example
-    # Construct the prompt with the image tag and the user's text input
     if text_input:
-        prompt_content = placeholder + text_input
     else:
-        prompt_content = placeholder
-    messages = [
-        {"role": "user", "content": prompt_content},
-    ]
-    # Apply the chat template to the messages
-    prompt = processor.tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    # Process the inputs with the processor
-    inputs = processor(prompt, images, return_tensors="pt").to("cuda:0")
-    # Generation parameters
-    generation_args = {
-        "max_new_tokens": 1000,
-        "temperature": 0.0,
-        "do_sample": False,
-    }
-    # Generate the response
-    generate_ids = model.generate(
-        **inputs,
-        eos_token_id=processor.tokenizer.eos_token_id,
-        **generation_args
-    )
-    # Remove input tokens from the generated response
-    generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
-    # Decode the generated output
-    response = processor.batch_decode(
-        generate_ids,
-        skip_special_tokens=True,
-        clean_up_tokenization_spaces=False
-    )[0]
     return response
 css = """
 footer {
     visibility: hidden;
 }
 """
-# Gradio app with two tabs
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
-    with gr.Tab("Chatbot"):
-        chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
-        gr.ChatInterface(
-            fn=stream_chat,
-            chatbot=chatbot,
-            fill_height=True,
-            additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
-            additional_inputs=[
-                gr.Textbox(
-                    value="You are a helpful assistant",
-                    label="System Prompt",
-                    render=False,
-                ),
-                gr.Slider(
-                    minimum=0,
-                    maximum=1,
-                    step=0.1,
-                    value=0.8,
-                    label="Temperature",
-                    render=False,
-                ),
-                gr.Slider(
-                    minimum=128,
-                    maximum=8192,
-                    step=1,
-                    value=1024,
-                    label="Max new tokens",
-                    render=False,
-                ),
-                gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.1,
-                    value=1.0,
-                    label="top_p",
-                    render=False,
-                ),
-                gr.Slider(
-                    minimum=1,
-                    maximum=20,
-                    step=1,
-                    value=20,
-                    label="top_k",
-                    render=False,
-                ),
-                gr.Slider(
-                    minimum=0.0,
-                    maximum=2.0,
-                    step=0.1,
-                    value=1.2,
-                    label="Repetition penalty",
-                    render=False,
-                ),
-            ],
-            examples=[
-                ["How to make a self-driving car?"],
-                ["Give me a creative idea to establish a startup"],
-                ["How can I improve my programming skills?"],
-                ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
-            ],
-            cache_examples=False,
-        )
-    with gr.Tab("Vision"):
-        with gr.Row():
-            input_img = gr.Image(label="Input Picture")
-        with gr.Row():
-            model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="microsoft/Phi-3.5-vision-instruct")
-        with gr.Row():
-            text_input = gr.Textbox(label="Question")
-        with gr.Row():
-            submit_btn = gr.Button(value="Submit")
-        with gr.Row():
-            output_text = gr.Textbox(label="Output Text")
-        submit_btn.click(stream_vision, [input_img, text_input, model_selector], [output_text])
     gr.HTML(footer)
-# Launch the combined app
 demo.launch(debug=True)

 import os
 import time
 import torch
+from transformers import AutoProcessor, AutoModelForImageTextToText
 import gradio as gr
 from threading import Thread
 from PIL import Image
+# Model and processor initialization
+processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
+model = AutoModelForImageTextToText.from_pretrained("Qwen/QVQ-72B-Preview").cuda().eval()
+# Footer
 footer = """
 <div style="text-align: center; margin-top: 20px;">
+    <p>Powered by QVQ-72B Model</p>
 </div>
 """
+# Vision model function
 @spaces.GPU()
+def process_image(image, text_input=None):
+    # Convert image to PIL format
+    image = Image.fromarray(image).convert("RGB")
+    # Prepare inputs
     if text_input:
+        inputs = processor(text=text_input, images=image, return_tensors="pt").to("cuda:0")
     else:
+        inputs = processor(images=image, return_tensors="pt").to("cuda:0")
+    # Generate output
+    outputs = model.generate(**inputs, max_new_tokens=1000)
+    # Decode response
+    response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
     return response
+# CSS styling
 css = """
 footer {
     visibility: hidden;
 }
 """
+# Gradio interface
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
+    with gr.Row():
+        input_img = gr.Image(label="Input Image")
+    with gr.Row():
+        text_input = gr.Textbox(label="Question (Optional)")
+    with gr.Row():
+        submit_btn = gr.Button(value="Submit")
+    with gr.Row():
+        output_text = gr.Textbox(label="Response")
+    submit_btn.click(process_image, [input_img, text_input], [output_text])
     gr.HTML(footer)
+# Launch the app
 demo.launch(debug=True)