theodotus commited on
Commit
5429f80
1 Parent(s): df2b973
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+
5
+
6
+
7
+ llm = Llama(
8
+ model_path="gemma-2b-uk.gguf",
9
+ chat_format="gemma"
10
+ )
11
+
12
+
13
+
14
+
15
+ def convert_history(message, history):
16
+ chat_history = []
17
+ for block in history[-1:]:
18
+ chat_history.append({
19
+ "role": "user",
20
+ "content": block[0]
21
+ })
22
+ chat_history.append({
23
+ "role": "model",
24
+ "content": block[1]
25
+ })
26
+ chat_history.append({
27
+ "role": "user",
28
+ "content": message
29
+ })
30
+ return chat_history
31
+
32
+
33
+ def ask(message, history):
34
+ chat_history = convert_history(message, history)
35
+ chunks = llm.create_chat_completion(
36
+ messages = chat_history,
37
+ temperature = 0,
38
+ stream = True,
39
+ repeat_penalty = 1.05,
40
+ )
41
+
42
+ response = ""
43
+ for chunk in chunks:
44
+ delta = chunk["choices"][0]["delta"]
45
+ if "content" not in delta:
46
+ continue
47
+ response += delta["content"]
48
+ yield response
49
+
50
+
51
+
52
+
53
+ demo = gr.ChatInterface(ask)
54
+
55
+ if __name__ == "__main__":
56
+ demo.queue().launch()