YangWu001 commited on
Commit
6330f37
·
1 Parent(s): cd040c9
Files changed (2) hide show
  1. app.py +9 -5
  2. app_interface.py +141 -0
app.py CHANGED
@@ -37,7 +37,7 @@ def respond(
37
  messages.append({"role": "user", "content": message})
38
 
39
  response = ""
40
- for message in client.chat_completion(
41
  messages,
42
  max_tokens=max_tokens,
43
  stream=True,
@@ -45,14 +45,17 @@ def respond(
45
  top_p=top_p,
46
  ):
47
  if stop_inference:
48
- history.append((message, "Inference cancelled."))
49
- yield history
50
  break
51
- token = message.choices[0].delta.content
52
  response += token
53
- history.append((message, response))
54
  yield history
55
 
 
 
 
 
56
  def cancel_inference():
57
  global stop_inference
58
  stop_inference = True
@@ -124,6 +127,7 @@ with gr.Blocks(css=custom_css) as demo:
124
  cancel_button = gr.Button("Cancel Inference", variant="danger")
125
 
126
  def chat_fn(message, history):
 
127
  return respond(
128
  message,
129
  history,
 
37
  messages.append({"role": "user", "content": message})
38
 
39
  response = ""
40
+ for message_chunk in client.chat_completion(
41
  messages,
42
  max_tokens=max_tokens,
43
  stream=True,
 
45
  top_p=top_p,
46
  ):
47
  if stop_inference:
48
+ response = "Inference cancelled."
 
49
  break
50
+ token = message_chunk.choices[0].delta.content
51
  response += token
52
+ history[-1] = (message, response)
53
  yield history
54
 
55
+ # Finalize response in history
56
+ history.append((message, response))
57
+ yield history
58
+
59
  def cancel_inference():
60
  global stop_inference
61
  stop_inference = True
 
127
  cancel_button = gr.Button("Cancel Inference", variant="danger")
128
 
129
  def chat_fn(message, history):
130
+ history.append((message, "")) # Initialize with empty response
131
  return respond(
132
  message,
133
  history,
app_interface.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import time
4
+
5
+ # Inference client setup
6
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
+
8
+ # Global flag to handle cancellation
9
+ stop_inference = False
10
+
11
+ def respond(
12
+ message,
13
+ history: list[tuple[str, str]],
14
+ system_message,
15
+ max_tokens,
16
+ temperature,
17
+ top_p,
18
+ use_local_model,
19
+ ):
20
+ global stop_inference
21
+ stop_inference = False # Reset cancellation flag
22
+
23
+ if use_local_model:
24
+ # Simulate local inference
25
+ time.sleep(2) # simulate a delay
26
+ response = "This is a response from the local model."
27
+ history.append((message, response))
28
+ yield history
29
+ else:
30
+ # API-based inference
31
+ messages = [{"role": "system", "content": system_message}]
32
+ for val in history:
33
+ if val[0]:
34
+ messages.append({"role": "user", "content": val[0]})
35
+ if val[1]:
36
+ messages.append({"role": "assistant", "content": val[1]})
37
+ messages.append({"role": "user", "content": message})
38
+
39
+ response = ""
40
+ for message in client.chat_completion(
41
+ messages,
42
+ max_tokens=max_tokens,
43
+ stream=True,
44
+ temperature=temperature,
45
+ top_p=top_p,
46
+ ):
47
+ if stop_inference:
48
+ history.append((message, "Inference cancelled."))
49
+ yield history
50
+ break
51
+ token = message.choices[0].delta.content
52
+ response += token
53
+ history.append((message, response))
54
+ yield history
55
+
56
+ def cancel_inference():
57
+ global stop_inference
58
+ stop_inference = True
59
+
60
+ # Custom CSS for a fancy look
61
+ custom_css = """
62
+ #main-container {
63
+ background-color: #f0f0f0;
64
+ font-family: 'Arial', sans-serif;
65
+ }
66
+
67
+ .gradio-container {
68
+ max-width: 700px;
69
+ margin: 0 auto;
70
+ padding: 20px;
71
+ background: white;
72
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
73
+ border-radius: 10px;
74
+ }
75
+
76
+ .gr-button {
77
+ background-color: #4CAF50;
78
+ color: white;
79
+ border: none;
80
+ border-radius: 5px;
81
+ padding: 10px 20px;
82
+ cursor: pointer;
83
+ transition: background-color 0.3s ease;
84
+ }
85
+
86
+ .gr-button:hover {
87
+ background-color: #45a049;
88
+ }
89
+
90
+ .gr-slider input {
91
+ color: #4CAF50;
92
+ }
93
+
94
+ .gr-chat {
95
+ font-size: 16px;
96
+ }
97
+
98
+ #title {
99
+ text-align: center;
100
+ font-size: 2em;
101
+ margin-bottom: 20px;
102
+ color: #333;
103
+ }
104
+ """
105
+
106
+ # Define the interface
107
+ with gr.Blocks(css=custom_css) as demo:
108
+ gr.Markdown("<h1 style='text-align: center;'>🌟 Fancy AI Chatbot 🌟</h1>")
109
+ gr.Markdown("Interact with the AI chatbot using customizable settings below.")
110
+
111
+ with gr.Row():
112
+ system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message", interactive=True)
113
+ use_local_model = gr.Checkbox(label="Use Local Model", value=False)
114
+
115
+ with gr.Row():
116
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
117
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
118
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
119
+
120
+ chat_history = gr.Chatbot(label="Chat")
121
+
122
+ user_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
123
+
124
+ cancel_button = gr.Button("Cancel Inference", variant="danger")
125
+
126
+ def chat_fn(message, history):
127
+ return respond(
128
+ message,
129
+ history,
130
+ system_message.value,
131
+ max_tokens.value,
132
+ temperature.value,
133
+ top_p.value,
134
+ use_local_model.value,
135
+ )
136
+
137
+ user_input.submit(chat_fn, [user_input, chat_history], chat_history)
138
+ cancel_button.click(cancel_inference)
139
+
140
+ if __name__ == "__main__":
141
+ demo.launch(share=True)