Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
1a8bf1d
1
Parent(s):
f820383
Update cuda().half(), fix timezone
Browse files
app.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
# pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
|
2 |
# import gradio as gr
|
3 |
|
@@ -6,17 +9,24 @@
|
|
6 |
|
7 |
# %%writefile demo-4bit.py
|
8 |
|
|
|
|
|
9 |
from textwrap import dedent
|
10 |
|
11 |
import gradio as gr
|
12 |
import mdtex2html
|
13 |
import torch
|
14 |
from loguru import logger
|
15 |
-
|
16 |
-
# credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
|
17 |
-
# while mistakes are mine
|
18 |
from transformers import AutoModel, AutoTokenizer
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
model_name = "THUDM/chatglm2-6b"
|
21 |
# model_name = "THUDM/chatglm2-6b-int4"
|
22 |
|
@@ -33,9 +43,9 @@ has_cuda = torch.cuda.is_available()
|
|
33 |
# has_cuda = False # force cpu
|
34 |
|
35 |
if has_cuda:
|
36 |
-
model =
|
37 |
-
model_name, trust_remote_code=True
|
38 |
-
)
|
39 |
else:
|
40 |
model = AutoModel.from_pretrained(
|
41 |
model_name, trust_remote_code=True
|
@@ -179,7 +189,7 @@ def retry_last_answer(
|
|
179 |
history.pop(-1)
|
180 |
|
181 |
yield from predict(
|
182 |
-
RETRY_FLAG,
|
183 |
user_input,
|
184 |
chatbot,
|
185 |
max_length,
|
@@ -196,7 +206,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) a
|
|
196 |
"""<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
|
197 |
)
|
198 |
|
199 |
-
with gr.Accordion("Info", open=False):
|
200 |
_ = """
|
201 |
## ChatGLM2-6B-int4
|
202 |
|
|
|
1 |
+
"""Credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
|
2 |
+
while mistakes are mine
|
3 |
+
"""
|
4 |
# pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
|
5 |
# import gradio as gr
|
6 |
|
|
|
9 |
|
10 |
# %%writefile demo-4bit.py
|
11 |
|
12 |
+
import os
|
13 |
+
import time
|
14 |
from textwrap import dedent
|
15 |
|
16 |
import gradio as gr
|
17 |
import mdtex2html
|
18 |
import torch
|
19 |
from loguru import logger
|
|
|
|
|
|
|
20 |
from transformers import AutoModel, AutoTokenizer
|
21 |
|
22 |
+
# fix timezone in Linux
|
23 |
+
os.environ["TZ"] = "Asia/Shanghai"
|
24 |
+
try:
|
25 |
+
time.tzset() # type: ignore # pylint: disable=no-member
|
26 |
+
except Exception:
|
27 |
+
# Windows
|
28 |
+
logger.warning("Windows, cant run time.tzset()")
|
29 |
+
|
30 |
model_name = "THUDM/chatglm2-6b"
|
31 |
# model_name = "THUDM/chatglm2-6b-int4"
|
32 |
|
|
|
43 |
# has_cuda = False # force cpu
|
44 |
|
45 |
if has_cuda:
|
46 |
+
model = (
|
47 |
+
AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda().half()
|
48 |
+
) # 3.92G
|
49 |
else:
|
50 |
model = AutoModel.from_pretrained(
|
51 |
model_name, trust_remote_code=True
|
|
|
189 |
history.pop(-1)
|
190 |
|
191 |
yield from predict(
|
192 |
+
RETRY_FLAG, # type: ignore
|
193 |
user_input,
|
194 |
chatbot,
|
195 |
max_length,
|
|
|
206 |
"""<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
|
207 |
)
|
208 |
|
209 |
+
with gr.Accordion("🎈 Info", open=False):
|
210 |
_ = """
|
211 |
## ChatGLM2-6B-int4
|
212 |
|