Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,12 +15,12 @@ from io import BytesIO
|
|
15 |
|
16 |
from serve.conversation import (default_conversation, conv_templates, SeparatorStyle)
|
17 |
from serve.constants import LOGDIR
|
18 |
-
from serve.utils import (
|
19 |
import subprocess
|
20 |
|
21 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
22 |
|
23 |
-
logger =
|
24 |
|
25 |
headers = {"User-Agent": "Bunny Client"}
|
26 |
|
@@ -82,7 +82,7 @@ def get_model_list():
|
|
82 |
ret = requests.post(args.controller_url + "/list_models")
|
83 |
models = ret.json()["models"]
|
84 |
models.sort(key=lambda x: priority.get(x, x))
|
85 |
-
logger.info(f"Models: {models}")
|
86 |
return models
|
87 |
|
88 |
|
@@ -97,7 +97,7 @@ function() {
|
|
97 |
|
98 |
|
99 |
def load_demo(url_params, request: gr.Request):
|
100 |
-
logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
|
101 |
|
102 |
dropdown_update = gr.update(visible=True)
|
103 |
if "model" in url_params:
|
@@ -111,7 +111,7 @@ def load_demo(url_params, request: gr.Request):
|
|
111 |
|
112 |
|
113 |
def load_demo_refresh_model_list(request: gr.Request):
|
114 |
-
logger.info(f"load_demo. ip: {request.client.host}")
|
115 |
models = get_model_list()
|
116 |
state = default_conversation.copy()
|
117 |
dropdown_update = gr.update(
|
@@ -134,25 +134,25 @@ def vote_last_response(state, vote_type, model_selector, request: gr.Request):
|
|
134 |
|
135 |
|
136 |
def upvote_last_response(state, model_selector, request: gr.Request):
|
137 |
-
logger.info(f"upvote. ip: {request.client.host}")
|
138 |
vote_last_response(state, "upvote", model_selector, request)
|
139 |
return ("",) + (disable_btn,) * 3
|
140 |
|
141 |
|
142 |
def downvote_last_response(state, model_selector, request: gr.Request):
|
143 |
-
logger.info(f"downvote. ip: {request.client.host}")
|
144 |
vote_last_response(state, "downvote", model_selector, request)
|
145 |
return ("",) + (disable_btn,) * 3
|
146 |
|
147 |
|
148 |
def flag_last_response(state, model_selector, request: gr.Request):
|
149 |
-
logger.info(f"flag. ip: {request.client.host}")
|
150 |
vote_last_response(state, "flag", model_selector, request)
|
151 |
return ("",) + (disable_btn,) * 3
|
152 |
|
153 |
|
154 |
def regenerate(state, image_process_mode, request: gr.Request):
|
155 |
-
logger.info(f"regenerate. ip: {request.client.host}")
|
156 |
state.messages[-1][-1] = None
|
157 |
prev_human_msg = state.messages[-2]
|
158 |
if type(prev_human_msg[1]) in (tuple, list):
|
@@ -162,7 +162,7 @@ def regenerate(state, image_process_mode, request: gr.Request):
|
|
162 |
|
163 |
|
164 |
def clear_history(request: gr.Request):
|
165 |
-
logger.info(f"clear_history. ip: {request.client.host}")
|
166 |
state = default_conversation.copy()
|
167 |
return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
|
168 |
|
@@ -196,7 +196,7 @@ def save_conversation(conversation):
|
|
196 |
|
197 |
|
198 |
def add_text(state, text, image, image_process_mode, request: gr.Request):
|
199 |
-
logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
|
200 |
if len(text) <= 0 and image is None:
|
201 |
state.skip_next = True
|
202 |
return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 5
|
@@ -216,7 +216,7 @@ def add_text(state, text, image, image_process_mode, request: gr.Request):
|
|
216 |
text = (text, image, image_process_mode)
|
217 |
if len(state.get_images(return_pil=True)) > 0:
|
218 |
state = default_conversation.copy()
|
219 |
-
logger.info(f"Input Text: {text}")
|
220 |
state.append_message(state.roles[0], text)
|
221 |
state.append_message(state.roles[1], None)
|
222 |
state.skip_next = False
|
@@ -224,7 +224,7 @@ def add_text(state, text, image, image_process_mode, request: gr.Request):
|
|
224 |
|
225 |
|
226 |
def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
|
227 |
-
logger.info(f"http_bot. ip: {request.client.host}")
|
228 |
start_tstamp = time.time()
|
229 |
model_name = model_selector
|
230 |
|
@@ -240,13 +240,13 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
240 |
new_state.append_message(new_state.roles[1], None)
|
241 |
state = new_state
|
242 |
|
243 |
-
logger.info(f"Processed Input Text: {state.messages[-2][1]}")
|
244 |
# Query worker address
|
245 |
controller_url = args.controller_url
|
246 |
ret = requests.post(controller_url + "/get_worker_address",
|
247 |
json={"model": model_name})
|
248 |
worker_addr = ret.json()["address"]
|
249 |
-
logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
|
250 |
|
251 |
# No available worker
|
252 |
if worker_addr == "":
|
@@ -276,7 +276,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
276 |
"stop": '<|im_end|>', #state.sep if state.sep_style in [SeparatorStyle.PLAIN, ] else state.sep2,
|
277 |
"images": f'List of {len(state.get_images())} images: {all_image_hash}',
|
278 |
}
|
279 |
-
logger.info(f"==== request ====\n{pload}")
|
280 |
|
281 |
pload['images'] = state.get_images()
|
282 |
print('=========> get_images')
|
@@ -313,7 +313,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
313 |
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
|
314 |
|
315 |
finish_tstamp = time.time()
|
316 |
-
logger.info(f"{output}")
|
317 |
|
318 |
with open(get_conv_log_filename(), "a") as fout:
|
319 |
data = {
|
@@ -519,14 +519,15 @@ if __name__ == "__main__":
|
|
519 |
parser.add_argument("--concurrency-count", type=int, default=10)
|
520 |
parser.add_argument("--model-list-mode", type=str, default="once",
|
521 |
choices=["once", "reload"])
|
|
|
522 |
parser.add_argument("--share", action="store_true")
|
523 |
parser.add_argument("--moderate", action="store_true")
|
524 |
parser.add_argument("--embed", action="store_true")
|
525 |
args = parser.parse_args()
|
526 |
-
logger.info(f"args: {args}")
|
527 |
|
528 |
models = get_model_list()
|
529 |
-
logger.info(args)
|
530 |
|
531 |
concurrency_count = int(os.getenv("concurrency_count", 5))
|
532 |
|
|
|
15 |
|
16 |
from serve.conversation import (default_conversation, conv_templates, SeparatorStyle)
|
17 |
from serve.constants import LOGDIR
|
18 |
+
from serve.utils import (build_#logger, server_error_msg, violates_moderation, moderation_msg)
|
19 |
import subprocess
|
20 |
|
21 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
22 |
|
23 |
+
#logger = build_#logger("gradio_web_server", "gradio_web_server.log")
|
24 |
|
25 |
headers = {"User-Agent": "Bunny Client"}
|
26 |
|
|
|
82 |
ret = requests.post(args.controller_url + "/list_models")
|
83 |
models = ret.json()["models"]
|
84 |
models.sort(key=lambda x: priority.get(x, x))
|
85 |
+
#logger.info(f"Models: {models}")
|
86 |
return models
|
87 |
|
88 |
|
|
|
97 |
|
98 |
|
99 |
def load_demo(url_params, request: gr.Request):
|
100 |
+
#logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
|
101 |
|
102 |
dropdown_update = gr.update(visible=True)
|
103 |
if "model" in url_params:
|
|
|
111 |
|
112 |
|
113 |
def load_demo_refresh_model_list(request: gr.Request):
|
114 |
+
#logger.info(f"load_demo. ip: {request.client.host}")
|
115 |
models = get_model_list()
|
116 |
state = default_conversation.copy()
|
117 |
dropdown_update = gr.update(
|
|
|
134 |
|
135 |
|
136 |
def upvote_last_response(state, model_selector, request: gr.Request):
|
137 |
+
#logger.info(f"upvote. ip: {request.client.host}")
|
138 |
vote_last_response(state, "upvote", model_selector, request)
|
139 |
return ("",) + (disable_btn,) * 3
|
140 |
|
141 |
|
142 |
def downvote_last_response(state, model_selector, request: gr.Request):
|
143 |
+
#logger.info(f"downvote. ip: {request.client.host}")
|
144 |
vote_last_response(state, "downvote", model_selector, request)
|
145 |
return ("",) + (disable_btn,) * 3
|
146 |
|
147 |
|
148 |
def flag_last_response(state, model_selector, request: gr.Request):
|
149 |
+
#logger.info(f"flag. ip: {request.client.host}")
|
150 |
vote_last_response(state, "flag", model_selector, request)
|
151 |
return ("",) + (disable_btn,) * 3
|
152 |
|
153 |
|
154 |
def regenerate(state, image_process_mode, request: gr.Request):
|
155 |
+
#logger.info(f"regenerate. ip: {request.client.host}")
|
156 |
state.messages[-1][-1] = None
|
157 |
prev_human_msg = state.messages[-2]
|
158 |
if type(prev_human_msg[1]) in (tuple, list):
|
|
|
162 |
|
163 |
|
164 |
def clear_history(request: gr.Request):
|
165 |
+
#logger.info(f"clear_history. ip: {request.client.host}")
|
166 |
state = default_conversation.copy()
|
167 |
return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
|
168 |
|
|
|
196 |
|
197 |
|
198 |
def add_text(state, text, image, image_process_mode, request: gr.Request):
|
199 |
+
#logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
|
200 |
if len(text) <= 0 and image is None:
|
201 |
state.skip_next = True
|
202 |
return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 5
|
|
|
216 |
text = (text, image, image_process_mode)
|
217 |
if len(state.get_images(return_pil=True)) > 0:
|
218 |
state = default_conversation.copy()
|
219 |
+
#logger.info(f"Input Text: {text}")
|
220 |
state.append_message(state.roles[0], text)
|
221 |
state.append_message(state.roles[1], None)
|
222 |
state.skip_next = False
|
|
|
224 |
|
225 |
|
226 |
def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
|
227 |
+
#logger.info(f"http_bot. ip: {request.client.host}")
|
228 |
start_tstamp = time.time()
|
229 |
model_name = model_selector
|
230 |
|
|
|
240 |
new_state.append_message(new_state.roles[1], None)
|
241 |
state = new_state
|
242 |
|
243 |
+
#logger.info(f"Processed Input Text: {state.messages[-2][1]}")
|
244 |
# Query worker address
|
245 |
controller_url = args.controller_url
|
246 |
ret = requests.post(controller_url + "/get_worker_address",
|
247 |
json={"model": model_name})
|
248 |
worker_addr = ret.json()["address"]
|
249 |
+
#logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
|
250 |
|
251 |
# No available worker
|
252 |
if worker_addr == "":
|
|
|
276 |
"stop": '<|im_end|>', #state.sep if state.sep_style in [SeparatorStyle.PLAIN, ] else state.sep2,
|
277 |
"images": f'List of {len(state.get_images())} images: {all_image_hash}',
|
278 |
}
|
279 |
+
#logger.info(f"==== request ====\n{pload}")
|
280 |
|
281 |
pload['images'] = state.get_images()
|
282 |
print('=========> get_images')
|
|
|
313 |
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
|
314 |
|
315 |
finish_tstamp = time.time()
|
316 |
+
#logger.info(f"{output}")
|
317 |
|
318 |
with open(get_conv_log_filename(), "a") as fout:
|
319 |
data = {
|
|
|
519 |
parser.add_argument("--concurrency-count", type=int, default=10)
|
520 |
parser.add_argument("--model-list-mode", type=str, default="once",
|
521 |
choices=["once", "reload"])
|
522 |
+
parser.add_argument("--controller-url", type=str, default="http://localhost:10000")
|
523 |
parser.add_argument("--share", action="store_true")
|
524 |
parser.add_argument("--moderate", action="store_true")
|
525 |
parser.add_argument("--embed", action="store_true")
|
526 |
args = parser.parse_args()
|
527 |
+
#logger.info(f"args: {args}")
|
528 |
|
529 |
models = get_model_list()
|
530 |
+
#logger.info(args)
|
531 |
|
532 |
concurrency_count = int(os.getenv("concurrency_count", 5))
|
533 |
|