Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,8 @@ from threading import Thread
|
|
7 |
|
8 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
9 |
if torch.cuda.is_available():
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
11 |
-
model = AutoModelForCausalLM.from_pretrained("
|
12 |
|
13 |
|
14 |
# Defining a custom stopping criteria class for the model's text generation.
|
@@ -24,14 +24,9 @@ class StopOnTokens(StoppingCriteria):
|
|
24 |
# Function to generate model predictions.
|
25 |
@spaces.GPU
|
26 |
def predict(message, history):
|
27 |
-
history_transformer_format = history + [[message, ""]]
|
28 |
stop = StopOnTokens()
|
29 |
-
|
30 |
-
|
31 |
-
messages = "<|system|>\nТы Фиалка - самый умный нейронный помощник, созданный 0x7o.</s>\n"
|
32 |
-
messages += "</s>".join(["</s>".join(["\n<|user|>" + item[0], "\n<|assistant|>" + item[1]])
|
33 |
-
for item in history_transformer_format])
|
34 |
-
model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
|
35 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
36 |
generate_kwargs = dict(
|
37 |
model_inputs,
|
|
|
7 |
|
8 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
9 |
if torch.cuda.is_available():
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-10.7B-Instruct-v1.0")
|
11 |
+
model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-10.7B-Instruct-v1.0", torch_dtype=torch.float16, device_map="auto")
|
12 |
|
13 |
|
14 |
# Defining a custom stopping criteria class for the model's text generation.
|
|
|
24 |
# Function to generate model predictions.
|
25 |
@spaces.GPU
|
26 |
def predict(message, history):
|
|
|
27 |
stop = StopOnTokens()
|
28 |
+
prompt = tokenizer.apply_chat_template(history + message, tokenize=False, add_generation_prompt=True)
|
29 |
+
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
|
|
|
|
|
|
|
|
30 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
31 |
generate_kwargs = dict(
|
32 |
model_inputs,
|