Spaces:

theodotus
/

gemma-2b-uk

Running

theodotus commited on Mar 6

Commit

5429f80

•

1 Parent(s): df2b973

Added app

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from llama_cpp import Llama
+llm = Llama(
+    model_path="gemma-2b-uk.gguf",
+    chat_format="gemma"
+)
+def convert_history(message, history):
+    chat_history = []
+    for block in history[-1:]:
+        chat_history.append({
+            "role": "user",
+            "content": block[0]
+        })
+        chat_history.append({
+            "role": "model",
+            "content": block[1]
+        })
+    chat_history.append({
+        "role": "user",
+        "content": message
+    })
+    return chat_history
+def ask(message, history):
+    chat_history = convert_history(message, history)
+    chunks = llm.create_chat_completion(
+        messages = chat_history,
+        temperature = 0,
+        stream = True,
+        repeat_penalty = 1.05,
+    )
+    response = ""
+    for chunk in chunks:
+        delta = chunk["choices"][0]["delta"]
+        if "content" not in delta:
+            continue
+        response += delta["content"]
+        yield response
+demo = gr.ChatInterface(ask)
+if __name__ == "__main__":
+    demo.queue().launch()