import gradio as gr import random import subprocess import time def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage cmd = [ "/app/llama.cpp/main", # Path to the executable "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", "-p", user_message, "-n", "400", "-e" ] # Start the subprocess process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) start_time = time.time() alllines = "" # Yield each line of output as it becomes available for line in process.stdout: alllines += " " + line elapsed_time = time.time() - start_time # Calculate elapsed time yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]" # Wait for the subprocess to finish if it hasn't already process.wait() # Check for any errors if process.returncode != 0: error_message = process.stderr.read() print(f"Error: {error_message}") def custom_generate_response(cust_user_message): cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message yield from generate_response(cust_user_message) """ def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage user_message = builtinprompt + '\n\n ' + user_message cmd = [ "/app/llama.cpp/main", # Path to the executable "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", "-p", user_message, "-n", "400", "-e" ] # Start the subprocess process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) start_time = time.time() alllines = "" # Yield each line of output as it becomes available for line in process.stdout: alllines += " " + line elapsed_time = time.time() - start_time # Calculate elapsed time yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]" # Wait for the subprocess to finish if it hasn't already process.wait() # Check for any errors if process.returncode != 0: error_message = process.stderr.read() print(f"Error: {error_message}") """ CustomPrompts = [ "Class Diagram for:", "Pydot code for:", ] with gr.Blocks() as iface: gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=2, placeholder="Type your message here..."), outputs="text", title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test", description="No Message History for now - Enter your message and get a response.", flagging_dir="/usr/src/app/flagged", ) with gr.Group(): gr.HTML("Test for wrapping generator") MainOutput = gr.TextArea() CustomButtonInput = gr.TextArea() CustomButtonClassDiagram = gr.Button(CustomPrompts[0]) CustomButtonPydotcode = gr.Button(CustomPrompts[1]) CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput) CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput) iface.queue().launch(server_name="0.0.0.0", share=True)