kwabs22
Flexible function for different prompts to ensure group categories
3267959
raw
history blame
8.23 kB
import gradio as gr
import psutil
import subprocess
import time
def generate_response_by_api():
FinalOutput = ""
#return FinalOutput
pass
def generate_response(user_message): #generate_response_token_by_token
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
process_monitor = psutil.Process(process.pid)
start_time = time.time()
monitor_start_time = time.time()
alltokens = ""
token_buffer = ''
tokencount = 0
try:
while True:
# Read one character at a time
char = process.stdout.read(1)
if char == '' and process.poll() is not None:
break
if char != '':
token_buffer += char
if char == ' ' or char == '\n': # Token delimiters
elapsed_time = time.time() - start_time # Calculate elapsed time
alltokens += token_buffer
tokencount += 1
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
token_buffer = '' # Reset token buffer
# Log resource usage every minute
if time.time() - monitor_start_time > 60:
cpu_usage = process_monitor.cpu_percent()
memory_usage = process_monitor.memory_info().rss # in bytes
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
monitor_start_time = time.time() # Reset the timer
# Yield the last token if there is any
if token_buffer:
elapsed_time = time.time() - start_time # Calculate elapsed time
alltokens += token_buffer
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { round(tokencount / elapsed_time, 2) }]"
finally:
try:
# Wait for the process to complete, with a timeout
process.wait(timeout=60) # Timeout in seconds
except subprocess.TimeoutExpired:
print("Process didn't complete within the timeout. Killing it.")
process.kill()
process.wait() # Ensure proper cleanup
# Wait for the subprocess to finish if it hasn't already
process.stdout.close()
process.stderr.close()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
# def custom_generate_response(cust_user_message, prompt_index):
# """
# Generates a custom response based on the user message and the selected prompt,
# including a custom ending specific to the prompt.
# Parameters:
# - cust_user_message: The message input from the user.
# - prompt_index: The index of the custom prompt to use.
# """
# prompt, ending = CustomPrompts[prompt_index] # Unpack the prompt and its ending
# cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
# yield from generate_response(cust_user_message)
def custom_generate_response(cust_user_message, prompt_index, prompts_list):
"""
Generates a custom response based on the user message, the selected prompt,
and the provided list of prompts, including a custom ending specific to the prompt.
Parameters:
- cust_user_message: The message input from the user.
- prompt_index: The index of the custom prompt to use.
- prompts_list: The list of prompts to use for generating the response.
"""
prompt, ending = prompts_list[prompt_index] # Unpack the prompt and its ending from the provided list
cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
yield from generate_response(cust_user_message)
CustomPrompts = [
("Write a Class Diagram based on the following text:", "Class Diagram:"),
("Write a Pydot code based on the following text:", "Pydot Code:"),
("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"),
("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"),
("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"),
("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"),
("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"),
("My problem to solve is", "- please make 10 sub problems have to solve from this:"),
]
BusinessPrompts = [
("Write an outline for a business plan for " , ""),
("Write an outline for a Executive Summary for " , "Executive Summary:"),
("Write an outline for a Company Description for " , "Company Description:"),
("Write an outline for a Market Analysis for " , "Market Analysis:"),
("Write an outline for a Marketing and Sales Strategy for " , "Marketing and Sales Strategy:"),
("Write an outline for a Product Development for " , "Product Development:"),
("Write an outline for a Operations and Management for " , "Operations and Management:"),
("Write an outline for a Financial Projections for " , "Financial Projections:"),
]
with gr.Blocks() as iface:
gr.HTML("Stabilityai's demo - https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr")
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
outputs="text",
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs (when this HF space is updated) or 800+ secs(HF space open for long))",
description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
flagging_dir="/usr/src/app/flagged",
)
gr.HTML("Any standard way of thinking / Repetitive idea / rule of thumb / advice can be turned into a button (In a timeline?)")
gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)")
MainOutput = gr.TextArea(placeholder='Output will show here')
CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')
# with gr.Accordion("Random Ideas"):
# with gr.Group():
# # Dynamically create buttons and assign actions
# for index, (prompt, _) in enumerate(CustomPrompts):
# button = gr.Button(prompt)
# button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index)], outputs=MainOutput)
# with gr.Accordion("General Product based", open=False):
# with gr.Group():
# # Dynamically create buttons and assign actions
# for index, (prompt, _) in enumerate(BusinessPrompts):
# button = gr.Button(prompt)
# button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index)], outputs=MainOutput)
with gr.Accordion("Random Ideas"):
with gr.Group():
for index, (prompt, _) in enumerate(CustomPrompts):
button = gr.Button(prompt)
# Pass CustomPrompts list as an argument
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(CustomPrompts)], outputs=MainOutput)
with gr.Accordion("General Product based", open=False):
with gr.Group():
for index, (prompt, _) in enumerate(BusinessPrompts):
button = gr.Button(prompt)
# Pass BusinessPrompts list as an argument
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(BusinessPrompts)], outputs=MainOutput)
iface.queue().launch(server_name="0.0.0.0", share=True)