Spaces:
Sleeping
Sleeping
Benjamin Gonzalez
commited on
Commit
·
c4f947a
1
Parent(s):
62a0c90
try to implement streaming
Browse files
app.py
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
import torch
|
2 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import gradio as gr
|
4 |
|
5 |
if torch.cuda.is_available():
|
@@ -13,11 +19,39 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
13 |
)
|
14 |
|
15 |
|
16 |
-
def
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
demo = gr.Interface(
|
@@ -27,7 +61,7 @@ demo = gr.Interface(
|
|
27 |
label="prompt",
|
28 |
value="Write a detailed analogy between mathematics and a lighthouse.",
|
29 |
),
|
30 |
-
gr.Number(value=100, label="max
|
31 |
],
|
32 |
outputs="text",
|
33 |
examples=[
|
@@ -50,6 +84,9 @@ demo = gr.Interface(
|
|
50 |
"""\n''',
|
51 |
100,
|
52 |
],
|
|
|
|
|
|
|
53 |
],
|
54 |
title="Microsoft Phi-2",
|
55 |
description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
|
|
|
1 |
import torch
|
2 |
+
from transformers import (
|
3 |
+
AutoTokenizer,
|
4 |
+
AutoModelForCausalLM,
|
5 |
+
TextIteratorStreamer,
|
6 |
+
StoppingCriteriaList,
|
7 |
+
)
|
8 |
+
from threading import Thread
|
9 |
import gradio as gr
|
10 |
|
11 |
if torch.cuda.is_available():
|
|
|
19 |
)
|
20 |
|
21 |
|
22 |
+
def Phi2StoppingCriteria(
|
23 |
+
input_ids: torch.LongTensor, score: torch.FloatTensor, **kwargs
|
24 |
+
) -> bool:
|
25 |
+
stop_list = ["Exercise", "Exercises", "<|endoftext|>"]
|
26 |
+
stop_tokens = []
|
27 |
+
for stop in stop_list:
|
28 |
+
stop_tokens.append(
|
29 |
+
tokenizer(stop, add_special_tokens=False, return_tensors="pt").input_ids
|
30 |
+
)
|
31 |
+
return input_ids[-1] in stop_tokens
|
32 |
+
|
33 |
+
|
34 |
+
stopping_criteria = StoppingCriteriaList([Phi2StoppingCriteria])
|
35 |
+
|
36 |
+
|
37 |
+
def generate(prompt, max_new_tokens):
|
38 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
39 |
+
# thanks https://huggingface.co/spaces/joaogante/transformers_streaming/blob/main/app.py
|
40 |
+
streamer = TextIteratorStreamer(inputs)
|
41 |
+
generation_kwargs = dict(
|
42 |
+
inputs,
|
43 |
+
streamer=streamer,
|
44 |
+
max_new_tokens=max_new_tokens,
|
45 |
+
do_sample=True,
|
46 |
+
stopping_criteria=stopping_criteria,
|
47 |
+
)
|
48 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
49 |
+
thread.start()
|
50 |
+
model_output = ""
|
51 |
+
for new_text in streamer:
|
52 |
+
model_output += new_text
|
53 |
+
yield model_output
|
54 |
+
return model_output
|
55 |
|
56 |
|
57 |
demo = gr.Interface(
|
|
|
61 |
label="prompt",
|
62 |
value="Write a detailed analogy between mathematics and a lighthouse.",
|
63 |
),
|
64 |
+
gr.Number(value=100, label="max new tokens", maximum=500),
|
65 |
],
|
66 |
outputs="text",
|
67 |
examples=[
|
|
|
84 |
"""\n''',
|
85 |
100,
|
86 |
],
|
87 |
+
["User: How does sleep affect mood?\nAI:", 125],
|
88 |
+
["Who was Ada Lovelace?", 100],
|
89 |
+
["Explain the concept of skip lists.", 125],
|
90 |
],
|
91 |
title="Microsoft Phi-2",
|
92 |
description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
|