Spaces:

randomblock1
/

phi-2

Sleeping

App Files Files Community

Benjamin Gonzalez commited on Dec 15, 2023

Commit

c4f947a

1 Parent(s): 62a0c90

try to implement streaming

Browse files

Files changed (1) hide show

app.py +44 -7

app.py CHANGED Viewed

@@ -1,5 +1,11 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 if torch.cuda.is_available():
@@ -13,11 +19,39 @@ model = AutoModelForCausalLM.from_pretrained(
 )
-def generate(prompt, length):
-    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
-    input_token_len = len(inputs.tokens())
-    outputs = model.generate(**inputs, max_length=length if length >= input_token_len else input_token_len)
-    return tokenizer.batch_decode(outputs)[0]
 demo = gr.Interface(
@@ -27,7 +61,7 @@ demo = gr.Interface(
             label="prompt",
             value="Write a detailed analogy between mathematics and a lighthouse.",
         ),
-        gr.Number(value=100, label="max length", maximum=500),
     ],
     outputs="text",
     examples=[
@@ -50,6 +84,9 @@ demo = gr.Interface(
    """\n''',
             100,
         ],
     ],
     title="Microsoft Phi-2",
     description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",

 import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    TextIteratorStreamer,
+    StoppingCriteriaList,
+)
+from threading import Thread
 import gradio as gr
 if torch.cuda.is_available():
 )
+def Phi2StoppingCriteria(
+    input_ids: torch.LongTensor, score: torch.FloatTensor, **kwargs
+) -> bool:
+    stop_list = ["Exercise", "Exercises", "<|endoftext|>"]
+    stop_tokens = []
+    for stop in stop_list:
+        stop_tokens.append(
+            tokenizer(stop, add_special_tokens=False, return_tensors="pt").input_ids
+        )
+    return input_ids[-1] in stop_tokens
+stopping_criteria = StoppingCriteriaList([Phi2StoppingCriteria])
+def generate(prompt, max_new_tokens):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # thanks https://huggingface.co/spaces/joaogante/transformers_streaming/blob/main/app.py
+    streamer = TextIteratorStreamer(inputs)
+    generation_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        stopping_criteria=stopping_criteria,
+    )
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    model_output = ""
+    for new_text in streamer:
+        model_output += new_text
+        yield model_output
+    return model_output
 demo = gr.Interface(
             label="prompt",
             value="Write a detailed analogy between mathematics and a lighthouse.",
         ),
+        gr.Number(value=100, label="max new tokens", maximum=500),
     ],
     outputs="text",
     examples=[
    """\n''',
             100,
         ],
+        ["User: How does sleep affect mood?\nAI:", 125],
+        ["Who was Ada Lovelace?", 100],
+        ["Explain the concept of skip lists.", 125],
     ],
     title="Microsoft Phi-2",
     description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",