MS-Phi-Phixtral-Chat

Runtime error

mlabonne commited on Jan 10, 2024

Commit

209aee6

1 Parent(s): 044264a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ model = AutoModelForCausalLM.from_pretrained(
 # Defining a custom stopping criteria class for the model's text generation.
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [2]  # IDs of tokens where the generation should stop.
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token.
                 return True
@@ -36,6 +36,7 @@ def predict(message, history):
     # Formatting the input for the model.
     system_prompt = "<|im_start|>system\nYou are Phixtral, a helpful AI assistant.<|im_end|>"
     messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
     input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(

 # Defining a custom stopping criteria class for the model's text generation.
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [50256, 50295]  # IDs of tokens where the generation should stop.
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token.
                 return True
     # Formatting the input for the model.
     system_prompt = "<|im_start|>system\nYou are Phixtral, a helpful AI assistant.<|im_end|>"
     messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
+    print(messages)
     input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(