Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -53,22 +53,29 @@ def respond(
|
|
53 |
messages.append({"role": "user", "content": message})
|
54 |
|
55 |
response = ""
|
56 |
-
|
57 |
print("Sending request to Hugging Face API.")
|
58 |
|
59 |
-
|
60 |
model=model_name,
|
61 |
messages=messages,
|
62 |
max_tokens=max_tokens,
|
63 |
temperature=temperature,
|
64 |
top_p=top_p,
|
65 |
stream=True
|
66 |
-
)
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
print("Completed response generation.")
|
74 |
|
|
|
53 |
messages.append({"role": "user", "content": message})
|
54 |
|
55 |
response = ""
|
|
|
56 |
print("Sending request to Hugging Face API.")
|
57 |
|
58 |
+
stream = client.chat.completions.create(
|
59 |
model=model_name,
|
60 |
messages=messages,
|
61 |
max_tokens=max_tokens,
|
62 |
temperature=temperature,
|
63 |
top_p=top_p,
|
64 |
stream=True
|
65 |
+
)
|
66 |
+
|
67 |
+
for chunk in stream:
|
68 |
+
try:
|
69 |
+
# Handle Hugging Face's streaming format
|
70 |
+
token = chunk.choices[0].delta.content
|
71 |
+
|
72 |
+
if token: # Skip empty tokens
|
73 |
+
response += token
|
74 |
+
yield response
|
75 |
+
print(f"Streamed token: {token}")
|
76 |
+
except AttributeError as e:
|
77 |
+
print(f"Error processing chunk: {e}")
|
78 |
+
continue
|
79 |
|
80 |
print("Completed response generation.")
|
81 |
|