damand2061 commited on
Commit
1cb464c
·
verified ·
1 Parent(s): 490cb72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -38
app.py CHANGED
@@ -1,64 +1,97 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
  gr.Slider(
53
  minimum=0.1,
54
  maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
 
 
 
 
 
 
 
 
58
  ),
59
  ],
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Function to create InferenceClient dynamically based on model selection
5
+ def get_client(model_name):
6
+ return InferenceClient(model_name)
 
 
7
 
8
  def respond(
9
  message,
10
  history: list[tuple[str, str]],
 
11
  max_tokens,
12
  temperature,
13
  top_p,
14
+ model_name, # Added model_name to the function arguments
15
  ):
16
+ # Statically defined system message
17
+ system_message = "You are a friendly Chatbot."
18
+
19
+ # Create client for the selected model
20
+ client = get_client(model_name)
21
+
22
+ # Check if the model is one of the problematic models
23
+ if model_name in ["indonlp/cendol-mt5-small-inst", "indonlp/cendol-mt5-small-chat"]:
24
+ # For these models, we simply concatenate the conversation into a single string
25
+ history_str = ""
26
+ for user_msg, assistant_msg in history:
27
+ if user_msg:
28
+ history_str += f"{user_msg}\n"
29
+ if assistant_msg:
30
+ history_str += f"{assistant_msg}\n"
31
+
32
+ # Add the latest user message
33
+ history_str += f"{message}\n"
34
+
35
+ # Pass the entire conversation history as a plain text prompt
36
+ response = client.text_generation(
37
+ history_str, # Single string as input
38
+ max_new_tokens=max_tokens,
39
+ temperature=temperature,
40
+ top_p=top_p
41
+ )
42
+
43
+ # Since response is a string, return it directly
44
+ full_response = response
45
+ else:
46
+ # For other models, we use a structured format with roles
47
+ messages = [{"role": "system", "content": system_message}]
48
+ for val in history:
49
+ if val[0]:
50
+ messages.append({"role": "user", "content": val[0]})
51
+ if val[1]:
52
+ messages.append({"role": "assistant", "content": val[1]})
53
+
54
+ # Add the latest user message
55
+ messages.append({"role": "user", "content": message})
56
+
57
+ # Make the request
58
+ response = client.chat_completion(
59
+ messages,
60
+ max_tokens=max_tokens,
61
+ temperature=temperature,
62
+ top_p=top_p,
63
+ stream=False
64
+ )
65
+
66
+ # Extract the full response for chat models
67
+ full_response = response.choices[0].message["content"]
68
+
69
+ return full_response
70
 
 
 
 
 
 
 
 
 
71
 
72
+ # Gradio ChatInterface setup with static system message and no Textbox for system message
 
 
 
 
 
 
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
 
76
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
77
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
  gr.Slider(
79
  minimum=0.1,
80
  maximum=1.0,
81
+ value=0.95, step=0.05, label="Top-p (nucleus sampling)"
82
+ ),
83
+ # Dropdown to select model
84
+ gr.Dropdown(
85
+ choices=[
86
+ "meta-llama/Meta-Llama-3-8B-Instruct",
87
+ "mistralai/Mistral-7B-Instruct-v0.3",
88
+ "HuggingFaceH4/zephyr-7b-beta"
89
+ ],
90
+ value="meta-llama/Meta-Llama-3-8B-Instruct",
91
+ label="Choose Model"
92
  ),
93
  ],
94
  )
95
 
 
96
  if __name__ == "__main__":
97
+ demo.launch()