aiqtech commited on
Commit
723de5f
·
verified ·
1 Parent(s): 97313a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -221
app.py CHANGED
@@ -2,249 +2,63 @@ import spaces
2
  import os
3
  import time
4
  import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig, AutoProcessor
6
  import gradio as gr
7
  from threading import Thread
8
  from PIL import Image
9
- import subprocess
10
 
11
- # Install flash-attn if not already installed
12
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
13
-
14
- # Define placeholder and footer
15
- PLACEHOLDER = "Send a message..."
16
 
 
17
  footer = """
18
  <div style="text-align: center; margin-top: 20px;">
19
- <p>Powered by Phi-3.5 Models</p>
20
  </div>
21
  """
22
 
23
- # Model and tokenizer for the chatbot
24
- MODEL_ID1 = "microsoft/Phi-3.5-mini-instruct"
25
- MODEL_LIST1 = ["microsoft/Phi-3.5-mini-instruct"]
26
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
27
-
28
- device = "cuda" if torch.cuda.is_available() else "cpu" # for GPU usage or "cpu" for CPU usage / But you need GPU :)
29
-
30
- quantization_config = BitsAndBytesConfig(
31
- load_in_4bit=True,
32
- bnb_4bit_compute_dtype=torch.bfloat16,
33
- bnb_4bit_use_double_quant=True,
34
- bnb_4bit_quant_type="nf4")
35
-
36
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
37
- model = AutoModelForCausalLM.from_pretrained(
38
- MODEL_ID1,
39
- torch_dtype=torch.bfloat16,
40
- device_map="auto",
41
- quantization_config=quantization_config)
42
-
43
- # Chatbot tab function
44
- @spaces.GPU()
45
- def stream_chat(
46
- message: str,
47
- history: list,
48
- system_prompt: str,
49
- temperature: float = 0.8,
50
- max_new_tokens: int = 1024,
51
- top_p: float = 1.0,
52
- top_k: int = 20,
53
- penalty: float = 1.2,
54
- ):
55
- print(f'message: {message}')
56
- print(f'history: {history}')
57
-
58
- conversation = [
59
- {"role": "system", "content": system_prompt}
60
- ]
61
- for prompt, answer in history:
62
- conversation.extend([
63
- {"role": "user", "content": prompt},
64
- {"role": "assistant", "content": answer},
65
- ])
66
-
67
- conversation.append({"role": "user", "content": message})
68
-
69
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
70
-
71
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
72
-
73
- generate_kwargs = dict(
74
- input_ids=input_ids,
75
- max_new_tokens = max_new_tokens,
76
- do_sample = False if temperature == 0 else True,
77
- top_p = top_p,
78
- top_k = top_k,
79
- temperature = temperature,
80
- eos_token_id=[128001,128008,128009],
81
- streamer=streamer,
82
- )
83
-
84
- with torch.no_grad():
85
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
86
- thread.start()
87
-
88
- buffer = ""
89
- for new_text in streamer:
90
- buffer += new_text
91
- yield buffer
92
-
93
- # Vision model setup
94
- models = {
95
- "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
96
- }
97
-
98
- processors = {
99
- "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
100
- }
101
-
102
- user_prompt = '\n'
103
- assistant_prompt = '\n'
104
- prompt_suffix = "\n"
105
-
106
- # Vision model tab function
107
  @spaces.GPU()
108
- def stream_vision(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
109
- model = models[model_id]
110
- processor = processors[model_id]
111
-
112
- # Prepare the image list and corresponding tags
113
- images = [Image.fromarray(image).convert("RGB")]
114
- placeholder = "<|image_1|>\n" # Using the image tag as per the example
115
-
116
- # Construct the prompt with the image tag and the user's text input
117
  if text_input:
118
- prompt_content = placeholder + text_input
119
  else:
120
- prompt_content = placeholder
121
-
122
- messages = [
123
- {"role": "user", "content": prompt_content},
124
- ]
125
-
126
- # Apply the chat template to the messages
127
- prompt = processor.tokenizer.apply_chat_template(
128
- messages,
129
- tokenize=False,
130
- add_generation_prompt=True
131
- )
132
-
133
- # Process the inputs with the processor
134
- inputs = processor(prompt, images, return_tensors="pt").to("cuda:0")
135
-
136
- # Generation parameters
137
- generation_args = {
138
- "max_new_tokens": 1000,
139
- "temperature": 0.0,
140
- "do_sample": False,
141
- }
142
-
143
- # Generate the response
144
- generate_ids = model.generate(
145
- **inputs,
146
- eos_token_id=processor.tokenizer.eos_token_id,
147
- **generation_args
148
- )
149
-
150
- # Remove input tokens from the generated response
151
- generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
152
-
153
- # Decode the generated output
154
- response = processor.batch_decode(
155
- generate_ids,
156
- skip_special_tokens=True,
157
- clean_up_tokenization_spaces=False
158
- )[0]
159
-
160
  return response
161
 
 
162
  css = """
163
  footer {
164
  visibility: hidden;
165
  }
166
  """
167
 
168
- # Gradio app with two tabs
169
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
170
-
171
- with gr.Tab("Chatbot"):
172
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
173
- gr.ChatInterface(
174
- fn=stream_chat,
175
- chatbot=chatbot,
176
- fill_height=True,
177
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
178
- additional_inputs=[
179
- gr.Textbox(
180
- value="You are a helpful assistant",
181
- label="System Prompt",
182
- render=False,
183
- ),
184
- gr.Slider(
185
- minimum=0,
186
- maximum=1,
187
- step=0.1,
188
- value=0.8,
189
- label="Temperature",
190
- render=False,
191
- ),
192
- gr.Slider(
193
- minimum=128,
194
- maximum=8192,
195
- step=1,
196
- value=1024,
197
- label="Max new tokens",
198
- render=False,
199
- ),
200
- gr.Slider(
201
- minimum=0.0,
202
- maximum=1.0,
203
- step=0.1,
204
- value=1.0,
205
- label="top_p",
206
- render=False,
207
- ),
208
- gr.Slider(
209
- minimum=1,
210
- maximum=20,
211
- step=1,
212
- value=20,
213
- label="top_k",
214
- render=False,
215
- ),
216
- gr.Slider(
217
- minimum=0.0,
218
- maximum=2.0,
219
- step=0.1,
220
- value=1.2,
221
- label="Repetition penalty",
222
- render=False,
223
- ),
224
- ],
225
- examples=[
226
- ["How to make a self-driving car?"],
227
- ["Give me a creative idea to establish a startup"],
228
- ["How can I improve my programming skills?"],
229
- ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
230
- ],
231
- cache_examples=False,
232
- )
233
- with gr.Tab("Vision"):
234
- with gr.Row():
235
- input_img = gr.Image(label="Input Picture")
236
- with gr.Row():
237
- model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="microsoft/Phi-3.5-vision-instruct")
238
- with gr.Row():
239
- text_input = gr.Textbox(label="Question")
240
- with gr.Row():
241
- submit_btn = gr.Button(value="Submit")
242
- with gr.Row():
243
- output_text = gr.Textbox(label="Output Text")
244
-
245
- submit_btn.click(stream_vision, [input_img, text_input, model_selector], [output_text])
246
-
247
  gr.HTML(footer)
248
 
249
- # Launch the combined app
250
  demo.launch(debug=True)
 
2
  import os
3
  import time
4
  import torch
5
+ from transformers import AutoProcessor, AutoModelForImageTextToText
6
  import gradio as gr
7
  from threading import Thread
8
  from PIL import Image
 
9
 
10
+ # Model and processor initialization
11
+ processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
12
+ model = AutoModelForImageTextToText.from_pretrained("Qwen/QVQ-72B-Preview").cuda().eval()
 
 
13
 
14
+ # Footer
15
  footer = """
16
  <div style="text-align: center; margin-top: 20px;">
17
+ <p>Powered by QVQ-72B Model</p>
18
  </div>
19
  """
20
 
21
+ # Vision model function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @spaces.GPU()
23
+ def process_image(image, text_input=None):
24
+ # Convert image to PIL format
25
+ image = Image.fromarray(image).convert("RGB")
26
+
27
+ # Prepare inputs
 
 
 
 
28
  if text_input:
29
+ inputs = processor(text=text_input, images=image, return_tensors="pt").to("cuda:0")
30
  else:
31
+ inputs = processor(images=image, return_tensors="pt").to("cuda:0")
32
+
33
+ # Generate output
34
+ outputs = model.generate(**inputs, max_new_tokens=1000)
35
+
36
+ # Decode response
37
+ response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
38
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  return response
40
 
41
+ # CSS styling
42
  css = """
43
  footer {
44
  visibility: hidden;
45
  }
46
  """
47
 
48
+ # Gradio interface
49
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
50
+ with gr.Row():
51
+ input_img = gr.Image(label="Input Image")
52
+ with gr.Row():
53
+ text_input = gr.Textbox(label="Question (Optional)")
54
+ with gr.Row():
55
+ submit_btn = gr.Button(value="Submit")
56
+ with gr.Row():
57
+ output_text = gr.Textbox(label="Response")
58
+
59
+ submit_btn.click(process_image, [input_img, text_input], [output_text])
60
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  gr.HTML(footer)
62
 
63
+ # Launch the app
64
  demo.launch(debug=True)