theodotus commited on
Commit
aa49098
1 Parent(s): 1db0c3f

Use zephyr chat format

Browse files
Files changed (1) hide show
  1. app.py +6 -19
app.py CHANGED
@@ -6,7 +6,6 @@ from llama_cpp import Llama
6
 
7
  llm = Llama(
8
  model_path="gemma-2b-uk.gguf",
9
- chat_format="gemma",
10
  n_threads=2,
11
  n_threads_batch=2,
12
  )
@@ -15,20 +14,10 @@ llm = Llama(
15
 
16
 
17
  def convert_history(message, history):
18
- chat_history = []
19
  for block in history[-1:]:
20
- chat_history.append({
21
- "role": "user",
22
- "content": block[0]
23
- })
24
- chat_history.append({
25
- "role": "model",
26
- "content": block[1]
27
- })
28
- chat_history.append({
29
- "role": "user",
30
- "content": message
31
- })
32
  return chat_history
33
 
34
 
@@ -45,11 +34,9 @@ def ask(message, history):
45
 
46
  response = ""
47
  for chunk in chunks:
48
- delta = chunk["choices"][0]["delta"]
49
- if "content" not in delta:
50
- continue
51
- print(delta["content"])
52
- response += delta["content"]
53
  yield response
54
 
55
 
 
6
 
7
  llm = Llama(
8
  model_path="gemma-2b-uk.gguf",
 
9
  n_threads=2,
10
  n_threads_batch=2,
11
  )
 
14
 
15
 
16
  def convert_history(message, history):
17
+ chat_history = ""
18
  for block in history[-1:]:
19
+ chat_history += f"<|user|>\n{block[0]}<eos>\n<|assistant|>\n{block[1]}<eos>\n"
20
+ chat_history += f"<|user|>\n{message}<eos>\n<|assistant|>\n"
 
 
 
 
 
 
 
 
 
 
21
  return chat_history
22
 
23
 
 
34
 
35
  response = ""
36
  for chunk in chunks:
37
+ delta = chunk["choices"][0]["text"]
38
+ print(delta)
39
+ response += delta
 
 
40
  yield response
41
 
42