Spaces:
Running
Running
alonsosilva
commited on
Commit
•
963666a
1
Parent(s):
95f8fb3
Add app
Browse files- Dockerfile +28 -0
- LICENSE +21 -0
- app.py +195 -0
- requirements.txt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
# Set up a new user named "user" with user ID 1000
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
+
# Switch to the "user" user
|
7 |
+
USER user
|
8 |
+
|
9 |
+
# Set home to the user's home directory
|
10 |
+
ENV HOME=/home/user \
|
11 |
+
PATH=/home/user/.local/bin:$PATH
|
12 |
+
|
13 |
+
# Set the working directory to the user's home directory
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
|
17 |
+
RUN pip install --no-cache-dir --upgrade pip
|
18 |
+
|
19 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
|
22 |
+
COPY --chown=user requirements.txt .
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
25 |
+
|
26 |
+
COPY --chown=user app.py app.py
|
27 |
+
|
28 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Alonso Silva Allende
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
app.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
+
|
3 |
+
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
5 |
+
|
6 |
+
class BaseStreamer:
|
7 |
+
"""
|
8 |
+
Base class from which `.generate()` streamers should inherit.
|
9 |
+
"""
|
10 |
+
|
11 |
+
def put(self, value):
|
12 |
+
"""Function that is called by `.generate()` to push new tokens"""
|
13 |
+
raise NotImplementedError()
|
14 |
+
|
15 |
+
def end(self):
|
16 |
+
"""Function that is called by `.generate()` to signal the end of generation"""
|
17 |
+
raise NotImplementedError()
|
18 |
+
|
19 |
+
class TextStreamer(BaseStreamer):
|
20 |
+
"""
|
21 |
+
Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.
|
22 |
+
|
23 |
+
<Tip warning={true}>
|
24 |
+
|
25 |
+
The API for the streamer classes is still under development and may change in the future.
|
26 |
+
|
27 |
+
</Tip>
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
tokenizer (`AutoTokenizer`):
|
31 |
+
The tokenized used to decode the tokens.
|
32 |
+
skip_prompt (`bool`, *optional*, defaults to `False`):
|
33 |
+
Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
|
34 |
+
decode_kwargs (`dict`, *optional*):
|
35 |
+
Additional keyword arguments to pass to the tokenizer's `decode` method.
|
36 |
+
|
37 |
+
Examples:
|
38 |
+
|
39 |
+
```python
|
40 |
+
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
41 |
+
|
42 |
+
>>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
43 |
+
>>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
|
44 |
+
>>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
|
45 |
+
>>> streamer = TextStreamer(tok)
|
46 |
+
|
47 |
+
>>> # Despite returning the usual output, the streamer will also print the generated text to stdout.
|
48 |
+
>>> _ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
|
49 |
+
An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
|
50 |
+
```
|
51 |
+
"""
|
52 |
+
|
53 |
+
def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs):
|
54 |
+
self.tokenizer = tokenizer
|
55 |
+
self.skip_prompt = skip_prompt
|
56 |
+
self.decode_kwargs = decode_kwargs
|
57 |
+
|
58 |
+
# variables used in the streaming process
|
59 |
+
self.token_cache = []
|
60 |
+
self.print_len = 0
|
61 |
+
self.next_tokens_are_prompt = True
|
62 |
+
|
63 |
+
def put(self, value):
|
64 |
+
"""
|
65 |
+
Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
|
66 |
+
"""
|
67 |
+
if len(value.shape) > 1 and value.shape[0] > 1:
|
68 |
+
raise ValueError("TextStreamer only supports batch size 1")
|
69 |
+
elif len(value.shape) > 1:
|
70 |
+
value = value[0]
|
71 |
+
|
72 |
+
if self.skip_prompt and self.next_tokens_are_prompt:
|
73 |
+
self.next_tokens_are_prompt = False
|
74 |
+
return
|
75 |
+
|
76 |
+
# Add the new token to the cache and decodes the entire thing.
|
77 |
+
self.token_cache.extend(value.tolist())
|
78 |
+
text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
|
79 |
+
|
80 |
+
# After the symbol for a new line, we flush the cache.
|
81 |
+
if text.endswith("\n"):
|
82 |
+
printable_text = text[self.print_len :]
|
83 |
+
self.token_cache = []
|
84 |
+
self.print_len = 0
|
85 |
+
# If the last token is a CJK character, we print the characters.
|
86 |
+
elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
|
87 |
+
printable_text = text[self.print_len :]
|
88 |
+
self.print_len += len(printable_text)
|
89 |
+
# Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
|
90 |
+
# which may change with the subsequent token -- there are probably smarter ways to do this!)
|
91 |
+
else:
|
92 |
+
printable_text = text[self.print_len : text.rfind(" ") + 1]
|
93 |
+
self.print_len += len(printable_text)
|
94 |
+
|
95 |
+
self.on_finalized_text(printable_text)
|
96 |
+
|
97 |
+
def end(self):
|
98 |
+
"""Flushes any remaining cache and prints a newline to stdout."""
|
99 |
+
# Flush the cache, if it exists
|
100 |
+
if len(self.token_cache) > 0:
|
101 |
+
text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
|
102 |
+
printable_text = text[self.print_len :]
|
103 |
+
self.token_cache = []
|
104 |
+
self.print_len = 0
|
105 |
+
else:
|
106 |
+
printable_text = ""
|
107 |
+
|
108 |
+
self.next_tokens_are_prompt = True
|
109 |
+
self.on_finalized_text(printable_text, stream_end=True)
|
110 |
+
|
111 |
+
def on_finalized_text(self, text: str, stream_end: bool = False):
|
112 |
+
"""Prints the new text to stdout. If the stream is ending, also prints a newline."""
|
113 |
+
# print(text, flush=True, end="" if not stream_end else None)
|
114 |
+
messages.value = [
|
115 |
+
*messages.value[:-1],
|
116 |
+
{
|
117 |
+
"role": "assistant",
|
118 |
+
"content": messages.value[-1]["content"] + text,
|
119 |
+
},
|
120 |
+
]
|
121 |
+
|
122 |
+
def _is_chinese_char(self, cp):
|
123 |
+
"""Checks whether CP is the codepoint of a CJK character."""
|
124 |
+
# This defines a "chinese character" as anything in the CJK Unicode block:
|
125 |
+
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
|
126 |
+
#
|
127 |
+
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
|
128 |
+
# despite its name. The modern Korean Hangul alphabet is a different block,
|
129 |
+
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
|
130 |
+
# space-separated words, so they are not treated specially and handled
|
131 |
+
# like the all of the other languages.
|
132 |
+
if (
|
133 |
+
(cp >= 0x4E00 and cp <= 0x9FFF)
|
134 |
+
or (cp >= 0x3400 and cp <= 0x4DBF) #
|
135 |
+
or (cp >= 0x20000 and cp <= 0x2A6DF) #
|
136 |
+
or (cp >= 0x2A700 and cp <= 0x2B73F) #
|
137 |
+
or (cp >= 0x2B740 and cp <= 0x2B81F) #
|
138 |
+
or (cp >= 0x2B820 and cp <= 0x2CEAF) #
|
139 |
+
or (cp >= 0xF900 and cp <= 0xFAFF)
|
140 |
+
or (cp >= 0x2F800 and cp <= 0x2FA1F) #
|
141 |
+
): #
|
142 |
+
return True
|
143 |
+
|
144 |
+
return False
|
145 |
+
|
146 |
+
streamer = TextStreamer(tokenizer, skip_prompt=True)
|
147 |
+
|
148 |
+
import re
|
149 |
+
import solara
|
150 |
+
from typing import List
|
151 |
+
from typing_extensions import TypedDict
|
152 |
+
|
153 |
+
class MessageDict(TypedDict):
|
154 |
+
role: str
|
155 |
+
content: str
|
156 |
+
|
157 |
+
messages: solara.Reactive[List[MessageDict]] = solara.reactive([])
|
158 |
+
@solara.component
|
159 |
+
def Page():
|
160 |
+
solara.lab.theme.themes.light.primary = "#0000ff"
|
161 |
+
solara.lab.theme.themes.light.secondary = "#0000ff"
|
162 |
+
solara.lab.theme.themes.dark.primary = "#0000ff"
|
163 |
+
solara.lab.theme.themes.dark.secondary = "#0000ff"
|
164 |
+
title = "Qwen2-0.5B"
|
165 |
+
with solara.Head():
|
166 |
+
solara.Title(f"{title}")
|
167 |
+
with solara.Column(align="center"):
|
168 |
+
user_message_count = len([m for m in messages.value if m["role"] == "user"])
|
169 |
+
def send(message):
|
170 |
+
messages.value = [*messages.value, {"role": "user", "content": message}]
|
171 |
+
def response(message):
|
172 |
+
messages.value = [*messages.value, {"role": "assistant", "content": ""}]
|
173 |
+
text = tokenizer.apply_chat_template(
|
174 |
+
[{"role": "user", "content": message}],
|
175 |
+
tokenize=False,
|
176 |
+
add_generation_prompt=True
|
177 |
+
)
|
178 |
+
inputs = tokenizer(text, return_tensors="pt")
|
179 |
+
_ = model.generate(**inputs, streamer=streamer, max_new_tokens=512)
|
180 |
+
def result():
|
181 |
+
if messages.value != []:
|
182 |
+
response(messages.value[-1]["content"])
|
183 |
+
result = solara.lab.use_task(result, dependencies=[user_message_count])
|
184 |
+
with solara.lab.ChatBox(style={"position": "fixed", "overflow-y": "scroll","scrollbar-width": "none", "-ms-overflow-style": "none", "top": "0", "bottom": "10rem", "width": "70%"}):
|
185 |
+
for item in messages.value:
|
186 |
+
with solara.lab.ChatMessage(
|
187 |
+
user=item["role"] == "user",
|
188 |
+
name="User" if item["role"] == "user" else "Qwen2-0.5B-Instruct",
|
189 |
+
avatar_background_color="#33cccc" if item["role"] == "assistant" else "#ff991f",
|
190 |
+
border_radius="20px",
|
191 |
+
style="background-color:lightgrey!important;"
|
192 |
+
):
|
193 |
+
item["content"] = re.sub('<\|im_end\|>', '', item["content"])
|
194 |
+
solara.Markdown(item["content"])
|
195 |
+
solara.lab.ChatInput(send_callback=send, style={"position": "fixed", "bottom": "3rem", "width": "70%"})
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.3.1
|
2 |
+
transformers==4.41.2
|
3 |
+
solara==1.33.0
|