Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,8 @@ from threading import Thread
|
|
8 |
|
9 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
10 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
11 |
-
device = "cuda"
|
12 |
-
model.to(device)
|
13 |
def chat(message, history):
|
14 |
prompt = [
|
15 |
{"role": "system", "content": "You are a helpful assistant."},
|
@@ -27,7 +27,8 @@ def chat(message, history):
|
|
27 |
add_generation_prompt=True
|
28 |
)
|
29 |
|
30 |
-
model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
|
|
|
31 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
32 |
generate_kwargs = dict(
|
33 |
model_inputs,
|
|
|
8 |
|
9 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
10 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
11 |
+
# device = "cuda"
|
12 |
+
# model.to(device)
|
13 |
def chat(message, history):
|
14 |
prompt = [
|
15 |
{"role": "system", "content": "You are a helpful assistant."},
|
|
|
27 |
add_generation_prompt=True
|
28 |
)
|
29 |
|
30 |
+
# model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
|
31 |
+
model_inputs = tokenizer([text], return_tensors="pt")
|
32 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
33 |
generate_kwargs = dict(
|
34 |
model_inputs,
|