|
import gradio as gr |
|
import random |
|
import subprocess |
|
import time |
|
|
|
def generate_response(user_message): |
|
cmd = [ |
|
"/app/llama.cpp/main", |
|
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", |
|
"-p", user_message, |
|
"-n", "400", |
|
"-e" |
|
] |
|
|
|
|
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
|
|
|
start_time = time.time() |
|
alllines = "" |
|
|
|
|
|
for line in process.stdout: |
|
alllines += " " + line |
|
elapsed_time = time.time() - start_time |
|
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]" |
|
|
|
|
|
process.wait() |
|
|
|
|
|
if process.returncode != 0: |
|
error_message = process.stderr.read() |
|
print(f"Error: {error_message}") |
|
|
|
|
|
def custom_generate_response(cust_user_message): |
|
cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message |
|
yield from generate_response(cust_user_message) |
|
|
|
""" |
|
def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage |
|
user_message = builtinprompt + '\n\n ' + user_message |
|
|
|
cmd = [ |
|
"/app/llama.cpp/main", # Path to the executable |
|
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", |
|
"-p", user_message, |
|
"-n", "400", |
|
"-e" |
|
] |
|
|
|
# Start the subprocess |
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
|
|
|
start_time = time.time() |
|
alllines = "" |
|
|
|
# Yield each line of output as it becomes available |
|
for line in process.stdout: |
|
alllines += " " + line |
|
elapsed_time = time.time() - start_time # Calculate elapsed time |
|
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]" |
|
|
|
# Wait for the subprocess to finish if it hasn't already |
|
process.wait() |
|
|
|
# Check for any errors |
|
if process.returncode != 0: |
|
error_message = process.stderr.read() |
|
print(f"Error: {error_message}") |
|
""" |
|
|
|
CustomPrompts = [ |
|
"Class Diagram for:", |
|
"Pydot code for:", |
|
] |
|
|
|
with gr.Blocks() as iface: |
|
gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."), |
|
outputs="text", |
|
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test", |
|
description="No Message History for now - Enter your message and get a response.", |
|
flagging_dir="/usr/src/app/flagged", |
|
) |
|
with gr.Group(): |
|
gr.HTML("Test for wrapping generator") |
|
MainOutput = gr.TextArea() |
|
CustomButtonInput = gr.TextArea() |
|
CustomButtonClassDiagram = gr.Button(CustomPrompts[0]) |
|
CustomButtonPydotcode = gr.Button(CustomPrompts[1]) |
|
CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput) |
|
CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput], outputs=MainOutput) |
|
|
|
iface.queue().launch(server_name="0.0.0.0", share=True) |