IlyaGusev commited on
Commit
c2d4494
1 Parent(s): 7183164

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -36
app.py CHANGED
@@ -9,21 +9,10 @@ from llama_cpp import Llama
9
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
10
 
11
 
12
- def get_message_tokens(model, role, content):
13
- content = f"{role}\n{content}\n</s>"
14
- content = content.encode("utf-8")
15
- return model.tokenize(content, special=True)
16
-
17
-
18
- def get_system_tokens(model):
19
- system_message = {"role": "system", "content": SYSTEM_PROMPT}
20
- return get_message_tokens(model, **system_message)
21
-
22
-
23
  def load_model(
24
  directory: str = ".",
25
- model_name: str = "model-q4_K.gguf",
26
- model_url: str = "https://huggingface.co/IlyaGusev/saiga2_13b_gguf/resolve/main/model-q4_K.gguf"
27
  ):
28
  final_model_path = os.path.join(directory, model_name)
29
 
@@ -36,7 +25,7 @@ def load_model(
36
 
37
  model = Llama(
38
  model_path=final_model_path,
39
- n_ctx=1024
40
  )
41
 
42
  print("Model loaded!")
@@ -59,35 +48,28 @@ def bot(
59
  temp
60
  ):
61
  model = MODEL
62
- tokens = get_system_tokens(model)[:]
63
 
64
  for user_message, bot_message in history[:-1]:
65
- message_tokens = get_message_tokens(model=model, role="user", content=user_message)
66
- tokens.extend(message_tokens)
67
  if bot_message:
68
- message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
69
- tokens.extend(message_tokens)
70
 
71
  last_user_message = history[-1][0]
72
- message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
73
- tokens.extend(message_tokens)
74
-
75
- role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
76
- tokens.extend(role_tokens)
77
- generator = model.generate(
78
- tokens,
79
  top_k=top_k,
80
  top_p=top_p,
81
- temp=temp
82
- )
83
-
84
- partial_text = ""
85
- for i, token in enumerate(generator):
86
- if token == model.token_eos():
87
- break
88
- partial_text += model.detokenize([token]).decode("utf-8", "ignore")
89
- history[-1][1] = partial_text
90
- yield history
91
 
92
 
93
  with gr.Blocks(
 
9
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def load_model(
13
  directory: str = ".",
14
+ model_name: str = "saiga_nemo_12b.Q4_K_M.gguf",
15
+ model_url: str = "https://huggingface.co/IlyaGusev/saiga_nemo_12b_gguf/resolve/main/saiga_nemo_12b.Q4_K_M.gguf"
16
  ):
17
  final_model_path = os.path.join(directory, model_name)
18
 
 
25
 
26
  model = Llama(
27
  model_path=final_model_path,
28
+ n_ctx=8192
29
  )
30
 
31
  print("Model loaded!")
 
48
  temp
49
  ):
50
  model = MODEL
51
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
52
 
53
  for user_message, bot_message in history[:-1]:
54
+ messages.append({"role": "user", "content": user_message})
 
55
  if bot_message:
56
+ messages.append({"role": "assistant", "content": bot_message})
 
57
 
58
  last_user_message = history[-1][0]
59
+ messages.append({"role": "user", "content": last_user_message})
60
+ partial_text = ""
61
+ for part in model.create_chat_completion(
62
+ messages,
63
+ temperature=temp,
 
 
64
  top_k=top_k,
65
  top_p=top_p,
66
+ stream=True,
67
+ ):
68
+ delta = part["choices"][0]["delta"]
69
+ if "content" in delta:
70
+ partial_text += delta["content"]
71
+ history[-1][1] = partial_text
72
+ yield history
 
 
 
73
 
74
 
75
  with gr.Blocks(