Spaces:
Running
on
T4
Running
on
T4
Add alternative and derail protection.
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os, gc, torch
|
3 |
from datetime import datetime
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from pynvml import *
|
@@ -123,6 +123,15 @@ def user(message, chatbot):
|
|
123 |
print(f"User: {message}")
|
124 |
return "", chatbot + [[message, None]]
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
def chat(
|
127 |
prompt,
|
128 |
user,
|
@@ -139,6 +148,9 @@ def chat(
|
|
139 |
alpha_presence=float(presence_penalty),
|
140 |
token_ban=[], # ban the generation of some tokens
|
141 |
token_stop=[]) # stop generation whenever you see any token here
|
|
|
|
|
|
|
142 |
|
143 |
message = chatbot[-1][0]
|
144 |
message = message.strip().replace('\r\n','\n').replace('\n\n','\n')
|
@@ -154,11 +166,14 @@ def chat(
|
|
154 |
prompt = f"\n{prompt}\n\n"
|
155 |
|
156 |
out, state = model.forward(pipeline.encode(prompt), None)
|
157 |
-
history = [state, []]
|
158 |
print("History reloaded.")
|
159 |
|
160 |
-
[state, all_tokens] = history
|
|
|
|
|
161 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:], state)
|
|
|
162 |
|
163 |
print("Bot: ", end='')
|
164 |
|
@@ -208,11 +223,27 @@ def chat(
|
|
208 |
if '\n\n' in out_str:
|
209 |
break
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
gc.collect()
|
212 |
torch.cuda.empty_cache()
|
213 |
|
214 |
chatbot[-1][1] = out_str.strip()
|
215 |
-
history = [state, all_tokens]
|
216 |
yield chatbot, history
|
217 |
|
218 |
with gr.Blocks(title=title) as demo:
|
@@ -245,6 +276,7 @@ with gr.Blocks(title=title) as demo:
|
|
245 |
message = gr.Textbox(label="Message")
|
246 |
with gr.Row():
|
247 |
send = gr.Button("Send", variant="primary")
|
|
|
248 |
clear = gr.Button("Clear", variant="secondary")
|
249 |
with gr.Column():
|
250 |
with gr.Row():
|
@@ -269,6 +301,7 @@ with gr.Blocks(title=title) as demo:
|
|
269 |
chat_outputs = [chatbot, state]
|
270 |
message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
271 |
send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
|
|
272 |
clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
|
273 |
|
274 |
demo.queue(max_size=10)
|
|
|
1 |
import gradio as gr
|
2 |
+
import os, copy, gc, torch
|
3 |
from datetime import datetime
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from pynvml import *
|
|
|
123 |
print(f"User: {message}")
|
124 |
return "", chatbot + [[message, None]]
|
125 |
|
126 |
+
def alternative(chatbot, history):
|
127 |
+
if not chatbot or not history:
|
128 |
+
return chatbot, history
|
129 |
+
|
130 |
+
chatbot[-1][1] = None
|
131 |
+
history[0] = copy.deepcopy(history[1])
|
132 |
+
|
133 |
+
return chatbot, history
|
134 |
+
|
135 |
def chat(
|
136 |
prompt,
|
137 |
user,
|
|
|
148 |
alpha_presence=float(presence_penalty),
|
149 |
token_ban=[], # ban the generation of some tokens
|
150 |
token_stop=[]) # stop generation whenever you see any token here
|
151 |
+
|
152 |
+
if not chatbot:
|
153 |
+
return chatbot, history
|
154 |
|
155 |
message = chatbot[-1][0]
|
156 |
message = message.strip().replace('\r\n','\n').replace('\n\n','\n')
|
|
|
166 |
prompt = f"\n{prompt}\n\n"
|
167 |
|
168 |
out, state = model.forward(pipeline.encode(prompt), None)
|
169 |
+
history = [state, None, []] # [state, state_pre, tokens]
|
170 |
print("History reloaded.")
|
171 |
|
172 |
+
[state, _, all_tokens] = history
|
173 |
+
state_pre_0 = copy.deepcopy(state)
|
174 |
+
|
175 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:], state)
|
176 |
+
state_pre_1 = copy.deepcopy(state) # For recovery
|
177 |
|
178 |
print("Bot: ", end='')
|
179 |
|
|
|
223 |
if '\n\n' in out_str:
|
224 |
break
|
225 |
|
226 |
+
# State recovery
|
227 |
+
if f'{user}:' in out_str or f'{bot}:' in out_str:
|
228 |
+
idx_user = out_str.find(f'{user}:')
|
229 |
+
idx_user = len(out_str) if idx_user == -1 else idx_user
|
230 |
+
idx_bot = out_str.find(f'{bot}:')
|
231 |
+
idx_bot = len(out_str) if idx_bot == -1 else idx_bot
|
232 |
+
idx = min(idx_user, idx_bot)
|
233 |
+
|
234 |
+
if idx < len(out_str):
|
235 |
+
out_str = f" {out_str[:idx].strip()}\n\n"
|
236 |
+
tokens = pipeline.encode(out_str)
|
237 |
+
|
238 |
+
all_tokens = all_tokens[:begin] + tokens
|
239 |
+
out, state = model.forward(tokens, state_pre_1)
|
240 |
+
break
|
241 |
+
|
242 |
gc.collect()
|
243 |
torch.cuda.empty_cache()
|
244 |
|
245 |
chatbot[-1][1] = out_str.strip()
|
246 |
+
history = [state, state_pre_0, all_tokens]
|
247 |
yield chatbot, history
|
248 |
|
249 |
with gr.Blocks(title=title) as demo:
|
|
|
276 |
message = gr.Textbox(label="Message")
|
277 |
with gr.Row():
|
278 |
send = gr.Button("Send", variant="primary")
|
279 |
+
alt = gr.Button("Alternative", variant="secondary")
|
280 |
clear = gr.Button("Clear", variant="secondary")
|
281 |
with gr.Column():
|
282 |
with gr.Row():
|
|
|
301 |
chat_outputs = [chatbot, state]
|
302 |
message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
303 |
send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
|
304 |
+
alt.click(alternative, [chatbot, state], [chatbot, state], queue=False).then(chat, chat_inputs, chat_outputs)
|
305 |
clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
|
306 |
|
307 |
demo.queue(max_size=10)
|