Spaces:
Sleeping
Sleeping
toaster61
commited on
Commit
·
559ea97
1
Parent(s):
be3d3fd
not working commit
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
|
4 |
app = Quart(__name__)
|
5 |
|
@@ -8,19 +8,20 @@ with open('system.prompt', 'r', encoding='utf-8') as f:
|
|
8 |
|
9 |
@app.post("/request")
|
10 |
async def echo():
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
output_ids = model.generate(
|
17 |
-
input_ids=input_ids,
|
18 |
-
do_sample=random.choice([True, False]), temperature=float(random.randint(7,20)) / 10.0,
|
19 |
-
max_new_tokens=data.get("max_tokens") or random.randomint(200,500),
|
20 |
-
eos_token_id=tokenizer.eos_token_id, return_full_text = False)
|
21 |
-
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
22 |
return {"output": output}
|
23 |
|
24 |
@app.get("/")
|
25 |
async def get():
|
26 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from quart import Quart, request
|
2 |
+
from llama_cpp import Llama
|
3 |
|
4 |
app = Quart(__name__)
|
5 |
|
|
|
8 |
|
9 |
@app.post("/request")
|
10 |
async def echo():
|
11 |
+
try:
|
12 |
+
data = await request.get_json()
|
13 |
+
if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500
|
14 |
+
userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: "
|
15 |
+
except: return {"error": "Not enough data"}, 400
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
return {"output": output}
|
17 |
|
18 |
@app.get("/")
|
19 |
async def get():
|
20 |
+
return '''<h1>Hello, world!</h1>
|
21 |
+
This is showcase how to make own server with OpenBuddy's model.<br>
|
22 |
+
I'm using here 3b model just for example. Also here's only CPU power.<br>
|
23 |
+
But you can use GPU power as well!<br>
|
24 |
+
<br>
|
25 |
+
<h1>How to GPU?</h1>
|
26 |
+
|
27 |
+
'''
|