zyliu commited on
Commit
69cc02c
1 Parent(s): 01657a2

update gradio_web_server.py and app.py

Browse files
Files changed (2) hide show
  1. app.py +841 -109
  2. gradio_web_server.py +2 -2
app.py CHANGED
@@ -1,116 +1,848 @@
1
- import fire
2
- import subprocess
 
 
3
  import os
4
  import time
5
- import signal
6
- import subprocess
7
- import atexit
8
-
9
-
10
- def kill_processes_by_cmd_substring(cmd_substring):
11
- # execute `ps -ef` and obtain its output
12
- result = subprocess.run(["ps", "-ef"], stdout=subprocess.PIPE, text=True)
13
- lines = result.stdout.splitlines()
14
-
15
- # visit each line
16
- for line in lines:
17
- if cmd_substring in line:
18
- # extract PID
19
- parts = line.split()
20
- pid = int(parts[1])
21
- print(f"Killing process with PID: {pid}, CMD: {line}")
22
- os.kill(pid, signal.SIGTERM)
23
-
24
-
25
- def main(
26
- python_path="python",
27
- run_controller=True,
28
- run_worker=True,
29
- run_gradio=True,
30
- controller_port=10086,
31
- gradio_port=7860,
32
- worker_names=[
33
- "OpenGVLab/InternVL2-8B",
34
- ],
35
- run_sd_worker=False,
36
- **kwargs,
37
- ):
38
- host = "http://0.0.0.0"
39
- controller_process = None
40
- if run_controller:
41
- # python controller.py --host 0.0.0.0 --port 10086
42
- cmd_args = [
43
- f"{python_path}",
44
- "controller.py",
45
- "--host",
46
- "0.0.0.0",
47
- "--port",
48
- f"{controller_port}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ]
50
- kill_processes_by_cmd_substring(" ".join(cmd_args))
51
- print("Launching controller: ", " ".join(cmd_args))
52
- controller_process = subprocess.Popen(cmd_args)
53
- atexit.register(controller_process.terminate)
54
-
55
- worker_processes = []
56
- if run_worker:
57
- worker_port = 10088
58
- for worker_name in worker_names:
59
- cmd_args = [
60
- f"{python_path}",
61
- "model_worker.py",
62
- "--port",
63
- f"{worker_port}",
64
- "--controller-url",
65
- f"{host}:{controller_port}",
66
- "--model-path",
67
- f"{worker_name}",
68
- "--load-8bit",
69
- ]
70
- kill_processes_by_cmd_substring(" ".join(cmd_args))
71
- print("Launching worker: ", " ".join(cmd_args))
72
- worker_process = subprocess.Popen(cmd_args)
73
- worker_processes.append(worker_process)
74
- atexit.register(worker_process.terminate)
75
- worker_port += 1
76
-
77
- time.sleep(60)
78
- gradio_process = None
79
- if run_gradio:
80
- # python gradio_web_server.py --port 10088 --controller-url http://0.0.0.0:10086
81
- cmd_args = [
82
- f"{python_path}",
83
- "gradio_web_server.py",
84
- "--port",
85
- f"{gradio_port}",
86
- "--controller-url",
87
- f"{host}:{controller_port}",
88
- "--model-list-mode",
89
- "reload",
90
  ]
91
- kill_processes_by_cmd_substring(" ".join(cmd_args))
92
- print("Launching gradio: ", " ".join(cmd_args))
93
- gradio_process = subprocess.Popen(cmd_args)
94
- atexit.register(gradio_process.terminate)
95
-
96
- sd_worker_process = None
97
- if run_sd_worker:
98
- # python model_worker.py --port 10088 --controller-address http://
99
- cmd_args = [f"{python_path}", "sd_worker.py"]
100
- kill_processes_by_cmd_substring(" ".join(cmd_args))
101
- print("Launching sd_worker: ", " ".join(cmd_args))
102
- sd_worker_process = subprocess.Popen(cmd_args)
103
- atexit.register(sd_worker_process.terminate)
104
-
105
- for worker_process in worker_processes:
106
- worker_process.wait()
107
- if controller_process:
108
- controller_process.wait()
109
- if gradio_process:
110
- gradio_process.wait()
111
- if sd_worker_process:
112
- sd_worker_process.wait()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  if __name__ == "__main__":
116
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from ast import parse
3
+ import datetime
4
+ import json
5
  import os
6
  import time
7
+ import hashlib
8
+ import re
9
+
10
+ import gradio as gr
11
+ import requests
12
+ import random
13
+ from filelock import FileLock
14
+ from io import BytesIO
15
+ from PIL import Image, ImageDraw, ImageFont
16
+
17
+ from constants import LOGDIR
18
+ from utils import (
19
+ build_logger,
20
+ server_error_msg,
21
+ violates_moderation,
22
+ moderation_msg,
23
+ load_image_from_base64,
24
+ get_log_filename,
25
+ )
26
+ from conversation import Conversation
27
+
28
+ logger = build_logger("gradio_web_server", "gradio_web_server.log")
29
+
30
+ headers = {"User-Agent": "InternVL-Chat Client"}
31
+
32
+ no_change_btn = gr.Button()
33
+ enable_btn = gr.Button(interactive=True)
34
+ disable_btn = gr.Button(interactive=False)
35
+
36
+
37
+ def write2file(path, content):
38
+ lock = FileLock(f"{path}.lock")
39
+ with lock:
40
+ with open(path, "a") as fout:
41
+ fout.write(content)
42
+
43
+
44
+ def sort_models(models):
45
+ def custom_sort_key(model_name):
46
+ # InternVL-Chat-V1-5 should be the first item
47
+ if model_name == "InternVL-Chat-V1-5":
48
+ return (1, model_name) # 1 indicates highest precedence
49
+ elif model_name.startswith("InternVL-Chat-V1-5-"):
50
+ return (1, model_name) # 1 indicates highest precedence
51
+ else:
52
+ return (0, model_name) # 0 indicates normal order
53
+
54
+ models.sort(key=custom_sort_key, reverse=True)
55
+ try: # We have five InternVL-Chat-V1-5 models, randomly choose one to be the first
56
+ first_three = models[:4]
57
+ random.shuffle(first_three)
58
+ models[:4] = first_three
59
+ except:
60
+ pass
61
+ return models
62
+
63
+
64
+ def get_model_list():
65
+ logger.info(f"Call `get_model_list`")
66
+ ret = requests.post(args.controller_url + "/refresh_all_workers")
67
+ logger.info(f"status_code from `get_model_list`: {ret.status_code}")
68
+ assert ret.status_code == 200
69
+ ret = requests.post(args.controller_url + "/list_models")
70
+ logger.info(f"status_code from `list_models`: {ret.status_code}")
71
+ models = ret.json()["models"]
72
+ models = sort_models(models)
73
+
74
+ logger.info(f"Models (from {args.controller_url}): {models}")
75
+ return models
76
+
77
+
78
+ get_window_url_params = """
79
+ function() {
80
+ const params = new URLSearchParams(window.location.search);
81
+ url_params = Object.fromEntries(params);
82
+ console.log(url_params);
83
+ return url_params;
84
+ }
85
+ """
86
+
87
+
88
+ def init_state(state=None):
89
+ if state is not None:
90
+ del state
91
+ return Conversation()
92
+
93
+
94
+ def find_bounding_boxes(state, response):
95
+ pattern = re.compile(r"<ref>\s*(.*?)\s*</ref>\s*<box>\s*(\[\[.*?\]\])\s*</box>")
96
+ matches = pattern.findall(response)
97
+ results = []
98
+ for match in matches:
99
+ results.append((match[0], eval(match[1])))
100
+ returned_image = None
101
+ latest_image = state.get_images(source=state.USER)[-1]
102
+ returned_image = latest_image.copy()
103
+ width, height = returned_image.size
104
+ draw = ImageDraw.Draw(returned_image)
105
+ for result in results:
106
+ line_width = max(1, int(min(width, height) / 200))
107
+ random_color = (
108
+ random.randint(0, 128),
109
+ random.randint(0, 128),
110
+ random.randint(0, 128),
111
+ )
112
+ category_name, coordinates = result
113
+ coordinates = [
114
+ (
115
+ float(x[0]) / 1000,
116
+ float(x[1]) / 1000,
117
+ float(x[2]) / 1000,
118
+ float(x[3]) / 1000,
119
+ )
120
+ for x in coordinates
121
  ]
122
+ coordinates = [
123
+ (
124
+ int(x[0] * width),
125
+ int(x[1] * height),
126
+ int(x[2] * width),
127
+ int(x[3] * height),
128
+ )
129
+ for x in coordinates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  ]
131
+ for box in coordinates:
132
+ draw.rectangle(box, outline=random_color, width=line_width)
133
+ font = ImageFont.truetype("assets/SimHei.ttf", int(20 * line_width / 2))
134
+ text_size = font.getbbox(category_name)
135
+ text_width, text_height = (
136
+ text_size[2] - text_size[0],
137
+ text_size[3] - text_size[1],
138
+ )
139
+ text_position = (box[0], max(0, box[1] - text_height))
140
+ draw.rectangle(
141
+ [
142
+ text_position,
143
+ (text_position[0] + text_width, text_position[1] + text_height),
144
+ ],
145
+ fill=random_color,
146
+ )
147
+ draw.text(text_position, category_name, fill="white", font=font)
148
+ return returned_image if len(matches) > 0 else None
149
+
150
+
151
+ def query_image_generation(response, sd_worker_url, timeout=15):
152
+ if not sd_worker_url:
153
+ return None
154
+ sd_worker_url = f"{sd_worker_url}/generate_image/"
155
+ pattern = r"```drawing-instruction\n(.*?)\n```"
156
+ match = re.search(pattern, response, re.DOTALL)
157
+ if match:
158
+ payload = {"caption": match.group(1)}
159
+ print("drawing-instruction:", payload)
160
+ response = requests.post(sd_worker_url, json=payload, timeout=timeout)
161
+ response.raise_for_status() # 检查HTTP请求是否成功
162
+ image = Image.open(BytesIO(response.content))
163
+ return image
164
+ else:
165
+ return None
166
+
167
+
168
+ def load_demo(url_params, request: gr.Request = None):
169
+ if not request:
170
+ logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
171
+
172
+ dropdown_update = gr.Dropdown(visible=True)
173
+ if "model" in url_params:
174
+ model = url_params["model"]
175
+ if model in models:
176
+ dropdown_update = gr.Dropdown(value=model, visible=True)
177
+
178
+ state = init_state()
179
+ return state, dropdown_update
180
+
181
+
182
+ def load_demo_refresh_model_list(request: gr.Request = None):
183
+ if not request:
184
+ logger.info(f"load_demo. ip: {request.client.host}")
185
+ models = get_model_list()
186
+ state = init_state()
187
+ dropdown_update = gr.Dropdown(
188
+ choices=models, value=models[0] if len(models) > 0 else ""
189
+ )
190
+ return state, dropdown_update
191
+
192
+
193
+ def vote_last_response(state, liked, model_selector, request: gr.Request):
194
+ conv_data = {
195
+ "tstamp": round(time.time(), 4),
196
+ "like": liked,
197
+ "model": model_selector,
198
+ "state": state.dict(),
199
+ "ip": request.client.host,
200
+ }
201
+ write2file(get_log_filename(), json.dumps(conv_data) + "\n")
202
+
203
+
204
+ def upvote_last_response(state, model_selector, request: gr.Request):
205
+ logger.info(f"upvote. ip: {request.client.host}")
206
+ vote_last_response(state, True, model_selector, request)
207
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
208
+ return (textbox,) + (disable_btn,) * 3
209
+
210
+
211
+ def downvote_last_response(state, model_selector, request: gr.Request):
212
+ logger.info(f"downvote. ip: {request.client.host}")
213
+ vote_last_response(state, False, model_selector, request)
214
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
215
+ return (textbox,) + (disable_btn,) * 3
216
+
217
+
218
+ def vote_selected_response(
219
+ state, model_selector, request: gr.Request, data: gr.LikeData
220
+ ):
221
+ logger.info(
222
+ f"Vote: {data.liked}, index: {data.index}, value: {data.value} , ip: {request.client.host}"
223
+ )
224
+ conv_data = {
225
+ "tstamp": round(time.time(), 4),
226
+ "like": data.liked,
227
+ "index": data.index,
228
+ "model": model_selector,
229
+ "state": state.dict(),
230
+ "ip": request.client.host,
231
+ }
232
+ write2file(get_log_filename(), json.dumps(conv_data) + "\n")
233
+ return
234
+
235
+
236
+ def flag_last_response(state, model_selector, request: gr.Request):
237
+ logger.info(f"flag. ip: {request.client.host}")
238
+ vote_last_response(state, "flag", model_selector, request)
239
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
240
+ return (textbox,) + (disable_btn,) * 3
241
+
242
+
243
+ def regenerate(state, image_process_mode, request: gr.Request):
244
+ logger.info(f"regenerate. ip: {request.client.host}")
245
+ # state.messages[-1][-1] = None
246
+ state.update_message(Conversation.ASSISTANT, None, -1)
247
+ prev_human_msg = state.messages[-2]
248
+ if type(prev_human_msg[1]) in (tuple, list):
249
+ prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode)
250
+ state.skip_next = False
251
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
252
+ return (state, state.to_gradio_chatbot(), textbox) + (disable_btn,) * 5
253
+
254
+
255
+ def clear_history(request: gr.Request):
256
+ logger.info(f"clear_history. ip: {request.client.host}")
257
+ state = init_state()
258
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
259
+ return (state, state.to_gradio_chatbot(), textbox) + (disable_btn,) * 5
260
+
261
+
262
+ def change_system_prompt(state, system_prompt, request: gr.Request):
263
+ logger.info(f"Change system prompt. ip: {request.client.host}")
264
+ state.set_system_message(system_prompt)
265
+ return state
266
+
267
+
268
+ def add_text(state, message, system_prompt, model_selector, request: gr.Request):
269
+ print(f"state: {state}")
270
+ if not state:
271
+ state, model_selector = load_demo_refresh_model_list(request)
272
+ images = message.get("files", [])
273
+ text = message.get("text", "").strip()
274
+ logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
275
+ # import pdb; pdb.set_trace()
276
+ textbox = gr.MultimodalTextbox(value=None, interactive=False)
277
+ if len(text) <= 0 and len(images) == 0:
278
+ state.skip_next = True
279
+ return (state, state.to_gradio_chatbot(), textbox) + (no_change_btn,) * 5
280
+ if args.moderate:
281
+ flagged = violates_moderation(text)
282
+ if flagged:
283
+ state.skip_next = True
284
+ textbox = gr.MultimodalTextbox(
285
+ value={"text": moderation_msg}, interactive=True
286
+ )
287
+ return (state, state.to_gradio_chatbot(), textbox) + (no_change_btn,) * 5
288
+ images = [Image.open(path).convert("RGB") for path in images]
289
+
290
+ if len(images) > 0 and len(state.get_images(source=state.USER)) > 0:
291
+ state = init_state(state)
292
+ state.set_system_message(system_prompt)
293
+ state.append_message(Conversation.USER, text, images)
294
+ state.skip_next = False
295
+ return (state, state.to_gradio_chatbot(), textbox, model_selector) + (
296
+ disable_btn,
297
+ ) * 5
298
+
299
+
300
+ def http_bot(
301
+ state,
302
+ model_selector,
303
+ temperature,
304
+ top_p,
305
+ repetition_penalty,
306
+ max_new_tokens,
307
+ max_input_tiles,
308
+ # bbox_threshold,
309
+ # mask_threshold,
310
+ request: gr.Request,
311
+ ):
312
+ logger.info(f"http_bot. ip: {request.client.host}")
313
+ start_tstamp = time.time()
314
+ model_name = model_selector
315
+ if hasattr(state, "skip_next") and state.skip_next:
316
+ # This generate call is skipped due to invalid inputs
317
+ yield (
318
+ state,
319
+ state.to_gradio_chatbot(),
320
+ gr.MultimodalTextbox(interactive=False),
321
+ ) + (no_change_btn,) * 5
322
+ return
323
+
324
+ # Query worker address
325
+ controller_url = args.controller_url
326
+ ret = requests.post(
327
+ controller_url + "/get_worker_address", json={"model": model_name}
328
+ )
329
+ worker_addr = ret.json()["address"]
330
+ logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
331
+
332
+ # No available worker
333
+ if worker_addr == "":
334
+ # state.messages[-1][-1] = server_error_msg
335
+ state.update_message(Conversation.ASSISTANT, server_error_msg)
336
+ yield (
337
+ state,
338
+ state.to_gradio_chatbot(),
339
+ gr.MultimodalTextbox(interactive=False),
340
+ disable_btn,
341
+ disable_btn,
342
+ disable_btn,
343
+ enable_btn,
344
+ enable_btn,
345
+ )
346
+ return
347
+
348
+ all_images = state.get_images(source=state.USER)
349
+ all_image_paths = [state.save_image(image) for image in all_images]
350
+
351
+ # Make requests
352
+ pload = {
353
+ "model": model_name,
354
+ "prompt": state.get_prompt(),
355
+ "temperature": float(temperature),
356
+ "top_p": float(top_p),
357
+ "max_new_tokens": max_new_tokens,
358
+ "max_input_tiles": max_input_tiles,
359
+ # "bbox_threshold": bbox_threshold,
360
+ # "mask_threshold": mask_threshold,
361
+ "repetition_penalty": repetition_penalty,
362
+ "images": f"List of {len(all_images)} images: {all_image_paths}",
363
+ }
364
+ logger.info(f"==== request ====\n{pload}")
365
+ pload.pop("images")
366
+ pload["prompt"] = state.get_prompt(inlude_image=True)
367
+ state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
368
+ yield (
369
+ state,
370
+ state.to_gradio_chatbot(),
371
+ gr.MultimodalTextbox(interactive=False),
372
+ ) + (disable_btn,) * 5
373
+
374
+ try:
375
+ # Stream output
376
+ response = requests.post(
377
+ worker_addr + "/worker_generate_stream",
378
+ headers=headers,
379
+ json=pload,
380
+ stream=True,
381
+ timeout=20,
382
+ )
383
+ for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
384
+ if chunk:
385
+ data = json.loads(chunk.decode())
386
+ if data["error_code"] == 0:
387
+ if "text" in data:
388
+ output = data["text"].strip()
389
+ output += state.streaming_placeholder
390
+
391
+ image = None
392
+ if "image" in data:
393
+ image = load_image_from_base64(data["image"])
394
+ _ = state.save_image(image)
395
+
396
+ state.update_message(Conversation.ASSISTANT, output, image)
397
+ yield (
398
+ state,
399
+ state.to_gradio_chatbot(),
400
+ gr.MultimodalTextbox(interactive=False),
401
+ ) + (disable_btn,) * 5
402
+ else:
403
+ output = (
404
+ f"**{data['text']}**" + f" (error_code: {data['error_code']})"
405
+ )
406
+
407
+ state.update_message(Conversation.ASSISTANT, output, None)
408
+ yield (
409
+ state,
410
+ state.to_gradio_chatbot(),
411
+ gr.MultimodalTextbox(interactive=True),
412
+ ) + (
413
+ disable_btn,
414
+ disable_btn,
415
+ disable_btn,
416
+ enable_btn,
417
+ enable_btn,
418
+ )
419
+ return
420
+ except requests.exceptions.RequestException as e:
421
+ state.update_message(Conversation.ASSISTANT, server_error_msg, None)
422
+ yield (
423
+ state,
424
+ state.to_gradio_chatbot(),
425
+ gr.MultimodalTextbox(interactive=True),
426
+ ) + (
427
+ disable_btn,
428
+ disable_btn,
429
+ disable_btn,
430
+ enable_btn,
431
+ enable_btn,
432
+ )
433
+ return
434
+
435
+ ai_response = state.return_last_message()
436
+ if "<ref>" in ai_response:
437
+ returned_image = find_bounding_boxes(state, ai_response)
438
+ returned_image = [returned_image] if returned_image else []
439
+ state.update_message(Conversation.ASSISTANT, ai_response, returned_image)
440
+ if "```drawing-instruction" in ai_response:
441
+ returned_image = query_image_generation(
442
+ ai_response, sd_worker_url=sd_worker_url
443
+ )
444
+ returned_image = [returned_image] if returned_image else []
445
+ state.update_message(Conversation.ASSISTANT, ai_response, returned_image)
446
+
447
+ state.end_of_current_turn()
448
+
449
+ yield (
450
+ state,
451
+ state.to_gradio_chatbot(),
452
+ gr.MultimodalTextbox(interactive=True),
453
+ ) + (enable_btn,) * 5
454
+
455
+ finish_tstamp = time.time()
456
+ logger.info(f"{output}")
457
+ data = {
458
+ "tstamp": round(finish_tstamp, 4),
459
+ "like": None,
460
+ "model": model_name,
461
+ "start": round(start_tstamp, 4),
462
+ "finish": round(start_tstamp, 4),
463
+ "state": state.dict(),
464
+ "images": all_image_paths,
465
+ "ip": request.client.host,
466
+ }
467
+ write2file(get_log_filename(), json.dumps(data) + "\n")
468
+
469
+
470
+ title_html = """
471
+ <h2> <span class="gradient-text" id="text">InternVL2</span><span class="plain-text">: Better than the Best—Expanding Performance Boundaries of Open-Source Multimodal Models with the Progressive Scaling Strategy</span></h2>
472
+ <a href="https://internvl.github.io/blog/2024-07-02-InternVL-2.0/">[📜 InternVL2 Blog]</a>
473
+ <a href="https://huggingface.co/spaces/OpenGVLab/InternVL">[🤗 HF Demo]</a>
474
+ <a href="https://github.com/OpenGVLab/InternVL?tab=readme-ov-file#quick-start-with-huggingface">[🚀 Quick Start]</a>
475
+ <a href="https://github.com/OpenGVLab/InternVL/blob/main/document/How_to_use_InternVL_API.md">[🌐 API]</a>
476
+ """
477
+
478
+ tos_markdown = """
479
+ ### Terms of use
480
+ By using this service, users are required to agree to the following terms:
481
+ The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
482
+ Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
483
+ For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
484
+ """
485
+
486
+
487
+ learn_more_markdown = """
488
+ ### License
489
+ The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
490
+
491
+ ### Acknowledgement
492
+ This demo is modified from LLaVA's demo. Thanks for their awesome work!
493
+ """
494
+ # .gradio-container {margin: 5px 10px 0 10px !important};
495
+ block_css = """
496
+ .gradio-container {margin: 0.1% 1% 0 1% !important; max-width: 98% !important;};
497
+ #buttons button {
498
+ min-width: min(120px,100%);
499
+ }
500
+
501
+ .gradient-text {
502
+ font-size: 28px;
503
+ width: auto;
504
+ font-weight: bold;
505
+ background: linear-gradient(45deg, red, orange, yellow, green, blue, indigo, violet);
506
+ background-clip: text;
507
+ -webkit-background-clip: text;
508
+ color: transparent;
509
+ }
510
+
511
+ .plain-text {
512
+ font-size: 22px;
513
+ width: auto;
514
+ font-weight: bold;
515
+ }
516
+ """
517
+
518
+ js = """
519
+ function createWaveAnimation() {
520
+ const text = document.getElementById('text');
521
+ var i = 0;
522
+ setInterval(function() {
523
+ const colors = [
524
+ 'red, orange, yellow, green, blue, indigo, violet, purple',
525
+ 'orange, yellow, green, blue, indigo, violet, purple, red',
526
+ 'yellow, green, blue, indigo, violet, purple, red, orange',
527
+ 'green, blue, indigo, violet, purple, red, orange, yellow',
528
+ 'blue, indigo, violet, purple, red, orange, yellow, green',
529
+ 'indigo, violet, purple, red, orange, yellow, green, blue',
530
+ 'violet, purple, red, orange, yellow, green, blue, indigo',
531
+ 'purple, red, orange, yellow, green, blue, indigo, violet',
532
+ ];
533
+ const angle = 45;
534
+ const colorIndex = i % colors.length;
535
+ text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`;
536
+ text.style.webkitBackgroundClip = 'text';
537
+ text.style.backgroundClip = 'text';
538
+ text.style.color = 'transparent';
539
+ text.style.fontSize = '28px';
540
+ text.style.width = 'auto';
541
+ text.textContent = 'InternVL2';
542
+ text.style.fontWeight = 'bold';
543
+ i += 1;
544
+ }, 200);
545
+ const params = new URLSearchParams(window.location.search);
546
+ url_params = Object.fromEntries(params);
547
+ // console.log(url_params);
548
+ // console.log('hello world...');
549
+ // console.log(window.location.search);
550
+ // console.log('hello world...');
551
+ // alert(window.location.search)
552
+ // alert(url_params);
553
+ return url_params;
554
+ }
555
+
556
+ """
557
+
558
+
559
+ def build_demo(embed_mode):
560
+ textbox = gr.MultimodalTextbox(
561
+ interactive=True,
562
+ file_types=["image", "video"],
563
+ placeholder="Enter message or upload file...",
564
+ show_label=False,
565
+ )
566
+
567
+ with gr.Blocks(
568
+ title="InternVL-Chat",
569
+ theme=gr.themes.Default(),
570
+ css=block_css,
571
+ ) as demo:
572
+ state = gr.State()
573
+
574
+ if not embed_mode:
575
+ # gr.Markdown(title_markdown)
576
+ gr.HTML(title_html)
577
+
578
+ with gr.Row():
579
+ with gr.Column(scale=2):
580
+
581
+ with gr.Row(elem_id="model_selector_row"):
582
+ model_selector = gr.Dropdown(
583
+ choices=models,
584
+ value=models[0] if len(models) > 0 else "",
585
+ # value="InternVL-Chat-V1-5",
586
+ interactive=True,
587
+ show_label=False,
588
+ container=False,
589
+ )
590
+
591
+ with gr.Accordion("System Prompt", open=False) as system_prompt_row:
592
+ system_prompt = gr.Textbox(
593
+ value="请尽可能详细地回答用户的问题。",
594
+ label="System Prompt",
595
+ interactive=True,
596
+ )
597
+ with gr.Accordion("Parameters", open=False) as parameter_row:
598
+ temperature = gr.Slider(
599
+ minimum=0.0,
600
+ maximum=1.0,
601
+ value=0.2,
602
+ step=0.1,
603
+ interactive=True,
604
+ label="Temperature",
605
+ )
606
+ top_p = gr.Slider(
607
+ minimum=0.0,
608
+ maximum=1.0,
609
+ value=0.7,
610
+ step=0.1,
611
+ interactive=True,
612
+ label="Top P",
613
+ )
614
+ repetition_penalty = gr.Slider(
615
+ minimum=1.0,
616
+ maximum=1.5,
617
+ value=1.1,
618
+ step=0.02,
619
+ interactive=True,
620
+ label="Repetition penalty",
621
+ )
622
+ max_output_tokens = gr.Slider(
623
+ minimum=0,
624
+ maximum=4096,
625
+ value=1024,
626
+ step=64,
627
+ interactive=True,
628
+ label="Max output tokens",
629
+ )
630
+ max_input_tiles = gr.Slider(
631
+ minimum=1,
632
+ maximum=32,
633
+ value=12,
634
+ step=1,
635
+ interactive=True,
636
+ label="Max input tiles (control the image size)",
637
+ )
638
+ examples = gr.Examples(
639
+ examples=[
640
+ [
641
+ {
642
+ "files": [
643
+ "gallery/prod_9.jpg",
644
+ ],
645
+ "text": "What's at the far end of the image?",
646
+ }
647
+ ],
648
+ [
649
+ {
650
+ "files": [
651
+ "gallery/astro_on_unicorn.png",
652
+ ],
653
+ "text": "What does this image mean?",
654
+ }
655
+ ],
656
+ [
657
+ {
658
+ "files": [
659
+ "gallery/prod_12.png",
660
+ ],
661
+ "text": "What are the consequences of the easy decisions shown in this image?",
662
+ }
663
+ ],
664
+ [
665
+ {
666
+ "files": [
667
+ "gallery/child_1.jpg",
668
+ "gallery/child_2.jpg",
669
+ f"gallery/child_3.jpg",
670
+ ],
671
+ "text": "这三帧图片讲述了一件什么事情?",
672
+ }
673
+ ],
674
+ ],
675
+ inputs=[textbox],
676
+ )
677
+
678
+ with gr.Column(scale=8):
679
+ chatbot = gr.Chatbot(
680
+ elem_id="chatbot",
681
+ label="InternVL2",
682
+ height=580,
683
+ show_copy_button=True,
684
+ show_share_button=True,
685
+ avatar_images=[
686
+ "assets/human.png",
687
+ "assets/assistant.png",
688
+ ],
689
+ bubble_full_width=False,
690
+ )
691
+ with gr.Row():
692
+ with gr.Column(scale=8):
693
+ textbox.render()
694
+ with gr.Column(scale=1, min_width=50):
695
+ submit_btn = gr.Button(value="Send", variant="primary")
696
+ with gr.Row(elem_id="buttons") as button_row:
697
+ upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
698
+ downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
699
+ flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
700
+ # stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
701
+ regenerate_btn = gr.Button(
702
+ value="🔄 Regenerate", interactive=False
703
+ )
704
+ clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
705
+
706
+ if not embed_mode:
707
+ gr.Markdown(tos_markdown)
708
+ gr.Markdown(learn_more_markdown)
709
+ url_params = gr.JSON(visible=False)
710
+
711
+ # Register listeners
712
+ btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
713
+ upvote_btn.click(
714
+ upvote_last_response,
715
+ [state, model_selector],
716
+ [textbox, upvote_btn, downvote_btn, flag_btn],
717
+ )
718
+ downvote_btn.click(
719
+ downvote_last_response,
720
+ [state, model_selector],
721
+ [textbox, upvote_btn, downvote_btn, flag_btn],
722
+ )
723
+ chatbot.like(
724
+ vote_selected_response,
725
+ [state, model_selector],
726
+ [],
727
+ )
728
+ flag_btn.click(
729
+ flag_last_response,
730
+ [state, model_selector],
731
+ [textbox, upvote_btn, downvote_btn, flag_btn],
732
+ )
733
+ regenerate_btn.click(
734
+ regenerate,
735
+ [state, system_prompt],
736
+ [state, chatbot, textbox] + btn_list,
737
+ ).then(
738
+ http_bot,
739
+ [
740
+ state,
741
+ model_selector,
742
+ temperature,
743
+ top_p,
744
+ repetition_penalty,
745
+ max_output_tokens,
746
+ max_input_tiles,
747
+ # bbox_threshold,
748
+ # mask_threshold,
749
+ ],
750
+ [state, chatbot, textbox] + btn_list,
751
+ )
752
+ clear_btn.click(clear_history, None, [state, chatbot, textbox] + btn_list)
753
+
754
+ textbox.submit(
755
+ add_text,
756
+ [state, textbox, system_prompt, model_selector],
757
+ [state, chatbot, textbox, model_selector] + btn_list,
758
+ ).then(
759
+ http_bot,
760
+ [
761
+ state,
762
+ model_selector,
763
+ temperature,
764
+ top_p,
765
+ repetition_penalty,
766
+ max_output_tokens,
767
+ max_input_tiles,
768
+ # bbox_threshold,
769
+ # mask_threshold,
770
+ ],
771
+ [state, chatbot, textbox] + btn_list,
772
+ )
773
+ submit_btn.click(
774
+ add_text,
775
+ [state, textbox, system_prompt, model_selector],
776
+ [state, chatbot, textbox, model_selector] + btn_list,
777
+ ).then(
778
+ http_bot,
779
+ [
780
+ state,
781
+ model_selector,
782
+ temperature,
783
+ top_p,
784
+ repetition_penalty,
785
+ max_output_tokens,
786
+ max_input_tiles,
787
+ # bbox_threshold,
788
+ # mask_threshold,
789
+ ],
790
+ [state, chatbot, textbox] + btn_list,
791
+ )
792
+
793
+ # NOTE: The following code will be not triggered when deployed on HF space.
794
+ # It's very strange. I don't know why.
795
+ """
796
+ if args.model_list_mode == "once":
797
+ demo.load(
798
+ load_demo,
799
+ [url_params],
800
+ [state, model_selector],
801
+ js=js,
802
+ )
803
+ elif args.model_list_mode == "reload":
804
+ demo.load(
805
+ load_demo_refresh_model_list,
806
+ None,
807
+ [state, model_selector],
808
+ js=js,
809
+ )
810
+ else:
811
+ raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
812
+ """
813
+
814
+ return demo
815
 
816
 
817
  if __name__ == "__main__":
818
+ parser = argparse.ArgumentParser()
819
+ parser.add_argument("--host", type=str, default="0.0.0.0")
820
+ parser.add_argument("--port", type=int, default=7860)
821
+ parser.add_argument("--controller-url", type=str, default=None)
822
+ parser.add_argument("--concurrency-count", type=int, default=10)
823
+ parser.add_argument(
824
+ "--model-list-mode", type=str, default="reload", choices=["once", "reload"]
825
+ )
826
+ parser.add_argument("--sd-worker-url", type=str, default=None)
827
+ parser.add_argument("--share", action="store_true")
828
+ parser.add_argument("--moderate", action="store_true")
829
+ parser.add_argument("--embed", action="store_true")
830
+ args = parser.parse_args()
831
+ logger.info(f"args: {args}")
832
+ if not args.controller_url:
833
+ args.controller_url = os.environ.get("CONTROLLER_URL", None)
834
+
835
+ if not args.controller_url:
836
+ raise ValueError("controller-url is required.")
837
+
838
+ models = get_model_list()
839
+
840
+ sd_worker_url = args.sd_worker_url
841
+ logger.info(args)
842
+ demo = build_demo(args.embed)
843
+ demo.queue(api_open=False).launch(
844
+ server_name=args.host,
845
+ server_port=args.port,
846
+ share=args.share,
847
+ max_threads=args.concurrency_count,
848
+ )
gradio_web_server.py CHANGED
@@ -817,11 +817,11 @@ def build_demo(embed_mode):
817
  if __name__ == "__main__":
818
  parser = argparse.ArgumentParser()
819
  parser.add_argument("--host", type=str, default="0.0.0.0")
820
- parser.add_argument("--port", type=int, default=11000)
821
  parser.add_argument("--controller-url", type=str, default=None)
822
  parser.add_argument("--concurrency-count", type=int, default=10)
823
  parser.add_argument(
824
- "--model-list-mode", type=str, default="once", choices=["once", "reload"]
825
  )
826
  parser.add_argument("--sd-worker-url", type=str, default=None)
827
  parser.add_argument("--share", action="store_true")
 
817
  if __name__ == "__main__":
818
  parser = argparse.ArgumentParser()
819
  parser.add_argument("--host", type=str, default="0.0.0.0")
820
+ parser.add_argument("--port", type=int, default=7860)
821
  parser.add_argument("--controller-url", type=str, default=None)
822
  parser.add_argument("--concurrency-count", type=int, default=10)
823
  parser.add_argument(
824
+ "--model-list-mode", type=str, default="reload", choices=["once", "reload"]
825
  )
826
  parser.add_argument("--sd-worker-url", type=str, default=None)
827
  parser.add_argument("--share", action="store_true")