|
import os |
|
import requests |
|
from flask import Flask, render_template, request |
|
from llama_cpp import Llama |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
llm = Llama(model_path="./model.gguf", n_ctx=2048) |
|
app = Flask(__name__, template_folder='./') |
|
|
|
@app.route("/") |
|
def home(): |
|
return render_template("index.html") |
|
|
|
@app.route("/respond") |
|
def respond(): |
|
input = str(request.args.get('input')) |
|
maxTok = int(str(request.args.get('maxTok'))) |
|
output = llm(input, max_tokens=maxTok, stop=["<|im_end|>"], echo=True) |
|
return output['choices'][0]['text'] |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) |