|
import gradio as gr |
|
import psutil |
|
import subprocess |
|
import time |
|
|
|
def generate_response_by_api(): |
|
FinalOutput = "" |
|
|
|
pass |
|
|
|
def generate_response(user_message): |
|
cmd = [ |
|
"/app/llama.cpp/main", |
|
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", |
|
"-p", user_message, |
|
"-n", "400", |
|
"-e" |
|
] |
|
|
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1) |
|
process_monitor = psutil.Process(process.pid) |
|
|
|
|
|
start_time = time.time() |
|
monitor_start_time = time.time() |
|
alltokens = "" |
|
token_buffer = '' |
|
tokencount = 0 |
|
try: |
|
while True: |
|
|
|
char = process.stdout.read(1) |
|
if char == '' and process.poll() is not None: |
|
break |
|
if char != '': |
|
token_buffer += char |
|
if char == ' ' or char == '\n': |
|
elapsed_time = time.time() - start_time |
|
alltokens += token_buffer |
|
tokencount += 1 |
|
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]" |
|
token_buffer = '' |
|
|
|
if time.time() - monitor_start_time > 60: |
|
cpu_usage = process_monitor.cpu_percent() |
|
memory_usage = process_monitor.memory_info().rss |
|
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB") |
|
monitor_start_time = time.time() |
|
|
|
|
|
if token_buffer: |
|
elapsed_time = time.time() - start_time |
|
alltokens += token_buffer |
|
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { round(tokencount / elapsed_time, 2) }]" |
|
|
|
finally: |
|
try: |
|
|
|
process.wait(timeout=60) |
|
except subprocess.TimeoutExpired: |
|
print("Process didn't complete within the timeout. Killing it.") |
|
process.kill() |
|
process.wait() |
|
|
|
process.stdout.close() |
|
process.stderr.close() |
|
|
|
|
|
if process.returncode != 0: |
|
error_message = process.stderr.read() |
|
print(f"Error: {error_message}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def custom_generate_response(cust_user_message, prompt_index, prompts_list): |
|
""" |
|
Generates a custom response based on the user message, the selected prompt, |
|
and the provided list of prompts, including a custom ending specific to the prompt. |
|
|
|
Parameters: |
|
- cust_user_message: The message input from the user. |
|
- prompt_index: The index of the custom prompt to use. |
|
- prompts_list: The list of prompts to use for generating the response. |
|
""" |
|
prompt, ending = prompts_list[prompt_index] |
|
cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}" |
|
yield from generate_response(cust_user_message) |
|
|
|
CustomPrompts = [ |
|
("Write a Class Diagram based on the following text:", "Class Diagram:"), |
|
("Write a Pydot code based on the following text:", "Pydot Code:"), |
|
("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"), |
|
("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"), |
|
("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"), |
|
("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"), |
|
("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"), |
|
("My problem to solve is", "- please make 10 sub problems have to solve from this:"), |
|
] |
|
|
|
BusinessPrompts = [ |
|
("Write an outline for a business plan for " , ""), |
|
("Write an outline for a Executive Summary for " , "Executive Summary:"), |
|
("Write an outline for a Company Description for " , "Company Description:"), |
|
("Write an outline for a Market Analysis for " , "Market Analysis:"), |
|
("Write an outline for a Marketing and Sales Strategy for " , "Marketing and Sales Strategy:"), |
|
("Write an outline for a Product Development for " , "Product Development:"), |
|
("Write an outline for a Operations and Management for " , "Operations and Management:"), |
|
("Write an outline for a Financial Projections for " , "Financial Projections:"), |
|
] |
|
|
|
with gr.Blocks() as iface: |
|
gr.HTML("Stabilityai's demo - https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr") |
|
gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."), |
|
outputs="text", |
|
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs (when this HF space is updated) or 800+ secs(HF space open for long))", |
|
description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.", |
|
flagging_dir="/usr/src/app/flagged", |
|
) |
|
|
|
gr.HTML("Any standard way of thinking / Repetitive idea / rule of thumb / advice can be turned into a button (In a timeline?)") |
|
|
|
|
|
gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)") |
|
MainOutput = gr.TextArea(placeholder='Output will show here') |
|
CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Accordion("Random Ideas"): |
|
with gr.Group(): |
|
for index, (prompt, _) in enumerate(CustomPrompts): |
|
button = gr.Button(prompt) |
|
|
|
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(CustomPrompts)], outputs=MainOutput) |
|
|
|
with gr.Accordion("General Product based", open=False): |
|
with gr.Group(): |
|
for index, (prompt, _) in enumerate(BusinessPrompts): |
|
button = gr.Button(prompt) |
|
|
|
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(BusinessPrompts)], outputs=MainOutput) |
|
|
|
iface.queue().launch(server_name="0.0.0.0", share=True) |