try-this-model

Running

wxgeorge commited on Sep 6

Commit

4c36b18

•

1 Parent(s): 30bad6e

:poop: cheesy "de"chatformatization of response.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ def respond(message, history, model):
         history_openai_format.append({"role": "assistant", "content":assistant})
     history_openai_format.append({"role": "user", "content": message})
     if model == "mattshumer/Reflection-Llama-3.1-70B":
         # chat/completions not working for this model;
         # apply chat template locally
@@ -46,7 +47,8 @@ def respond(message, history, model):
             #     debugger_ran = True
             if chunk.choices[0].text is not None:
                 partial_message = partial_message + chunk.choices[0].text
-                yield partial_message
     else:
         response = client.chat.completions.create(
             model=model,

         history_openai_format.append({"role": "assistant", "content":assistant})
     history_openai_format.append({"role": "user", "content": message})
     if model == "mattshumer/Reflection-Llama-3.1-70B":
         # chat/completions not working for this model;
         # apply chat template locally
             #     debugger_ran = True
             if chunk.choices[0].text is not None:
                 partial_message = partial_message + chunk.choices[0].text
+                prefix_to_strip = "<|start_header_id|>assistant<|end_header_id|>\n\n"
+                yield partial_message[len(prefix_to_strip):]
     else:
         response = client.chat.completions.create(
             model=model,