Spaces:
Sleeping
Sleeping
AFischer1985
commited on
Commit
•
62a7f24
1
Parent(s):
dcdb53b
Update run.py
Browse files
run.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
##########################################################################################
|
7 |
|
8 |
|
9 |
-
# Get model
|
10 |
#-----------
|
11 |
|
12 |
import os
|
@@ -16,23 +16,27 @@ dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
|
|
16 |
if(os.path.exists(dbPath)==False):
|
17 |
dbPath="/home/user/app/db"
|
18 |
|
|
|
|
|
19 |
#modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
|
20 |
modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
21 |
if(os.path.exists(modelPath)==False):
|
22 |
-
url="https://huggingface.co/TheBloke/
|
23 |
response = requests.get(url)
|
24 |
with open("./model.gguf", mode="wb") as file:
|
25 |
file.write(response.content)
|
26 |
print("Model downloaded")
|
27 |
modelPath="./model.gguf"
|
28 |
|
|
|
|
|
29 |
|
30 |
# Llama-cpp-Server
|
31 |
#------------------
|
32 |
|
33 |
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600"]
|
34 |
subprocess.Popen(command)
|
35 |
-
print("
|
36 |
|
37 |
|
38 |
# Chroma-DB
|
@@ -68,6 +72,7 @@ else:
|
|
68 |
ids=["doc1", "doc2", "doc3"],
|
69 |
)
|
70 |
|
|
|
71 |
print(collection.count())
|
72 |
|
73 |
|
@@ -76,7 +81,6 @@ print(collection.count())
|
|
76 |
|
77 |
import gradio as gr
|
78 |
import requests
|
79 |
-
import random
|
80 |
import json
|
81 |
def response(message, history):
|
82 |
addon=""
|
@@ -101,10 +105,12 @@ def response(message, history):
|
|
101 |
print(str(body))
|
102 |
print("User: "+message+"\nAI: ")
|
103 |
for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
|
104 |
-
|
|
|
|
|
105 |
text=text.decode('utf-8')
|
106 |
-
if(text.startswith(": ping -")==False):buffer=
|
107 |
-
print("\n*** Buffer: "+str(buffer)+"\n***\n")
|
108 |
buffer=buffer.split('"finish_reason": null}]}')
|
109 |
if(len(buffer)==1):
|
110 |
buffer="".join(buffer)
|
@@ -117,10 +123,10 @@ def response(message, history):
|
|
117 |
print(part, end="", flush=True)
|
118 |
response=response+part
|
119 |
buffer="" # reset buffer
|
120 |
-
except:
|
|
|
121 |
pass
|
122 |
yield response
|
123 |
|
124 |
-
gr.ChatInterface(response).queue().launch(share=
|
125 |
-
|
126 |
-
|
|
|
6 |
##########################################################################################
|
7 |
|
8 |
|
9 |
+
# Get model
|
10 |
#-----------
|
11 |
|
12 |
import os
|
|
|
16 |
if(os.path.exists(dbPath)==False):
|
17 |
dbPath="/home/user/app/db"
|
18 |
|
19 |
+
print(dbPath)
|
20 |
+
|
21 |
#modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
|
22 |
modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
23 |
if(os.path.exists(modelPath)==False):
|
24 |
+
url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
|
25 |
response = requests.get(url)
|
26 |
with open("./model.gguf", mode="wb") as file:
|
27 |
file.write(response.content)
|
28 |
print("Model downloaded")
|
29 |
modelPath="./model.gguf"
|
30 |
|
31 |
+
print(modelPath)
|
32 |
+
|
33 |
|
34 |
# Llama-cpp-Server
|
35 |
#------------------
|
36 |
|
37 |
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600"]
|
38 |
subprocess.Popen(command)
|
39 |
+
print("Server ready!")
|
40 |
|
41 |
|
42 |
# Chroma-DB
|
|
|
72 |
ids=["doc1", "doc2", "doc3"],
|
73 |
)
|
74 |
|
75 |
+
print("Database ready!")
|
76 |
print(collection.count())
|
77 |
|
78 |
|
|
|
81 |
|
82 |
import gradio as gr
|
83 |
import requests
|
|
|
84 |
import json
|
85 |
def response(message, history):
|
86 |
addon=""
|
|
|
105 |
print(str(body))
|
106 |
print("User: "+message+"\nAI: ")
|
107 |
for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
|
108 |
+
if buffer is None: buffer=""
|
109 |
+
buffer=str("".join(buffer))
|
110 |
+
#print("*** Raw String: "+str(text)+"\n***\n")
|
111 |
text=text.decode('utf-8')
|
112 |
+
if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
|
113 |
+
#print("\n*** Buffer: "+str(buffer)+"\n***\n")
|
114 |
buffer=buffer.split('"finish_reason": null}]}')
|
115 |
if(len(buffer)==1):
|
116 |
buffer="".join(buffer)
|
|
|
123 |
print(part, end="", flush=True)
|
124 |
response=response+part
|
125 |
buffer="" # reset buffer
|
126 |
+
except Exception as e:
|
127 |
+
print("Exception:"+str(e))
|
128 |
pass
|
129 |
yield response
|
130 |
|
131 |
+
gr.ChatInterface(response).queue().launch(share=False, server_name="0.0.0.0", server_port=7864)
|
132 |
+
print("Interface up and running!")
|
|