0x7o commited on
Commit
0ca8c6e
1 Parent(s): 96c39c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -7,8 +7,8 @@ from threading import Thread
7
 
8
  # Loading the tokenizer and model from Hugging Face's model hub.
9
  if torch.cuda.is_available():
10
- tokenizer = AutoTokenizer.from_pretrained("0x7194633/fialka-13B-v4")
11
- model = AutoModelForCausalLM.from_pretrained("0x7194633/fialka-13B-v4", load_in_8bit=True, device_map="auto")
12
 
13
 
14
  # Defining a custom stopping criteria class for the model's text generation.
@@ -24,14 +24,9 @@ class StopOnTokens(StoppingCriteria):
24
  # Function to generate model predictions.
25
  @spaces.GPU
26
  def predict(message, history):
27
- history_transformer_format = history + [[message, ""]]
28
  stop = StopOnTokens()
29
-
30
- # Formatting the input for the model.
31
- messages = "<|system|>\nТы Фиалка - самый умный нейронный помощник, созданный 0x7o.</s>\n"
32
- messages += "</s>".join(["</s>".join(["\n<|user|>" + item[0], "\n<|assistant|>" + item[1]])
33
- for item in history_transformer_format])
34
- model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
35
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
36
  generate_kwargs = dict(
37
  model_inputs,
 
7
 
8
  # Loading the tokenizer and model from Hugging Face's model hub.
9
  if torch.cuda.is_available():
10
+ tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-10.7B-Instruct-v1.0")
11
+ model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-10.7B-Instruct-v1.0", torch_dtype=torch.float16, device_map="auto")
12
 
13
 
14
  # Defining a custom stopping criteria class for the model's text generation.
 
24
  # Function to generate model predictions.
25
  @spaces.GPU
26
  def predict(message, history):
 
27
  stop = StopOnTokens()
28
+ prompt = tokenizer.apply_chat_template(history + message, tokenize=False, add_generation_prompt=True)
29
+ model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
 
 
 
30
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
31
  generate_kwargs = dict(
32
  model_inputs,