from llama_cpp import Llama import gradio as gr import os import json GRADIO_SHOW_API_INFO = os.getenv("AGS_SHOW_API_INFO") or True AGS_REPO = os.getenv("AGS_REPO") or "lmstudio-community/gemma-1.1-2b-it-GGUF" AGS_FILENAME = os.getenv("AGS_FILENAME") or "gemma-1.1-2b-it-Q4_K_M.gguf" AGS_LLAMA_CONFIG = { "prompt_format": "raw" } AGS_TITLE = os.getenv("AGS_TITLE") or "API GGUF Space" try: AGS_LLAMA_CONFIG = json.loads(os.getenv("AGS_LLAMA_CONFIG")) except Exception as e: if AGS_LLAMA_CONFIG and AGS_LLAMA_CONFIG is not None: print("Invalid Llama config. Config must be valid JSON. Got:\n", AGS_LLAMA_CONFIG) ARGS_LLAMA_CONFIG = { "n_gpu_layers": 0, } def main(): llm = Llama.from_pretrained(AGS_REPO, filename=AGS_FILENAME, **ARGS_LLAMA_CONFIG) def api_chat(inpt, settings): res = None try: inpt = json.loads(inpt) settings = json.loads(settings) print("Request:\n" + json.dumps(inpt, indent=2) + "\n\n" + ("*_"*24)) except Exception as e: res = llm(inpt, **settings) if "@execute" in inpt and inpt['@execute']: inpt.pop("@execute", None) res = llm(json.dumps(inpt), **settings) if "messages" in inpt and inpt['messages']: res = llm.create_chat_completion(messages=inpt, **settings) if "prompt" in settings and settings["prompt"]: res = llm(inpt, **settings) if res is None: res = llm(json.dumps(inpt), **settings) if settings and "full_output" in settings and settings["full_output"]: return res if "content" in res['choices'][0]: return res['choices'][0]['content'] if "text" in res['choices'][0]: return res['choices'][0]['text'] if "message" in res['choices'][0] and "content" in res['choices'][0]['message']: return res['choices'][0]['message']['content'] return res def chat(inpt): if not inpt: return "" return llm.create_chat_completion(messages=[{ "role": "user", "content": inpt }])['choices'][0]['message']['content'] with gr.Interface(fn=chat, inputs=[inpt:=gr.Textbox()], outputs="text") as interface: with gr.Row(visible=False): shadow_input = gr.Textbox(visible=False) shadow_input.submit(api_chat, inputs=[inpt, shadow_input], api_name="api_chat", server_name=AGS_TITLE) interface.launch(debug=True, show_api=GRADIO_SHOW_API_INFO) if __name__ == "__main__": main()