Spaces:

Mikhil-jivus
/

EndpointTesting

Runtime error

App Files Files Community

Mikhil-jivus commited on Oct 4, 2024

Commit

b17ecc2

verified ·

1 Parent(s): ae4333a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -15

app.py CHANGED Viewed

@@ -18,6 +18,16 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto"  # Automatically use available GPU/CPU efficiently
 )
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -26,18 +36,17 @@ def respond(
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
     # Tokenize the input messages
-    input_text = system_message + " ".join([f"{msg['role']}: {msg['content']}" for msg in messages])
     input_ids = tokenizer.encode(input_text, return_tensors="pt")
     # Move input_ids to the GPU
@@ -60,15 +69,19 @@ def respond(
     # Decode the response
     response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
-    yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
@@ -82,4 +95,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch(share=True)

     device_map="auto"  # Automatically use available GPU/CPU efficiently
 )
+# Define a function to clean up any repeated segments in the generated response
+def clean_response(response, history):
+    # Check for repetition in the response and remove it
+    if len(history) > 0:
+        last_user_message, last_bot_response = history[-1]
+        if last_bot_response in response:
+            response = response.replace(last_bot_response, "").strip()
+    return response
 def respond(
     message,
     history: list[tuple[str, str]],
     temperature,
     top_p,
 ):
+    # Add system prompt only once at the beginning of the conversation
+    if len(history) == 0:
+        input_text = f"system: {system_message}\nuser: {message}\n"
+    else:
+        input_text = f"user: {message}\n"
+    # Append previous conversation history to the input text
+    for user_msg, bot_msg in history:
+        input_text += f"user: {user_msg}\nassistant: {bot_msg}\n"
     # Tokenize the input messages
     input_ids = tokenizer.encode(input_text, return_tensors="pt")
     # Move input_ids to the GPU
     # Decode the response
     response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
+    # Clean the response to remove any repeated or unnecessary text
+    response = clean_response(response, history)
+    # Update history with the new user message and bot response
+    history.append((message, response))
+    return response
+# Set up the Gradio app interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a helpful and friendly assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
 )
 if __name__ == "__main__":
+    demo.launch(share=True)