from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, Qwen2VLForConditionalGeneration, AutoProcessor import spaces import gradio as gr import os from threading import Thread from PIL import Image from qwen_vl_utils import process_vision_info # model_name = "Qwen/Qwen2.5-7B-Instruct" model_name = "Qwen/Qwen2.5-7B-Instruct" # model = AutoModelForCausalLM.from_pretrained( # model_name, # torch_dtype="auto", # device_map='auto' # ) # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval() # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda").eval() # tokenizer = AutoTokenizer.from_pretrained(model_name) model = Qwen2VLForConditionalGeneration.from_pretrained( "Qwen/Qwen2-VL-72B-Instruct-AWQ", torch_dtype="auto", device_map="auto" ) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) @spaces.GPU def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float): messages = [ { "role": "user", "content": [ { "type": "image", "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", }, {"type": "text", "text": "Describe this image."}, ], } ] # Preparation for inference text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to("cuda") generated_ids = model.generate(**inputs, max_new_tokens=128) generated_ids_trimmed = [ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) ] output_text = processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False ) print(output_text) return output_text # print(f'message is - {message}') # print(f'history is - {history}') # conversation = [] # for prompt, answer in history: # conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}]) # conversation.append({"role": "user", "content": [ # { # "type": "text", # "text": "这是什么" # }, # { # "type": "image_url", # "image_url": { # "url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg" # } # } # ]}) # print(f"Conversation is -\n{conversation}") # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) # inputs = tokenizer(input_ids, return_tensors="pt").to(0) # streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) # generate_kwargs = dict( # inputs, # streamer=streamer, # top_k=top_k, # top_p=top_p, # repetition_penalty=penalty, # max_new_tokens=max_new_tokens, # do_sample=True, # temperature=temperature, # eos_token_id = [151645, 151643], # ) # thread = Thread(target=model.generate, kwargs=generate_kwargs) # thread.start() # buffer = "" # for new_text in streamer: # buffer += new_text # yield buffer chatbot = gr.Chatbot(height=450) with gr.Blocks() as demo: gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") gr.ChatInterface( fn=stream_chat, chatbot=chatbot, fill_height=True, additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), additional_inputs=[ gr.Slider( minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature", render=False, ), gr.Slider( minimum=128, maximum=4096, step=1, value=1024, label="Max new tokens", render=False, ), gr.Slider( minimum=0.0, maximum=1.0, step=0.1, value=0.8, label="top_p", render=False, ), gr.Slider( minimum=1, maximum=20, step=1, value=20, label="top_k", render=False, ), gr.Slider( minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Repetition penalty", render=False, ), ], examples=[ ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."], ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."], ["Tell me a random fun fact about the Roman Empire."], ["Show me a code snippet of a website's sticky header in CSS and JavaScript."], ], cache_examples=False, ) if __name__ == "__main__": demo.launch()