Spaces:

KwabsHug
/

FrontEndasPromptEngineeringTest

Sleeping

App Files Files Community

kwabs22 commited on Jan 30

Commit

078637c

•

1 Parent(s): e3894fb

Testing built in prompt?

Browse files

Files changed (1) hide show

app.py +46 -37

app.py CHANGED Viewed

@@ -1,32 +1,9 @@
 import gradio as gr
-#from llama_cpp import Llama
 import random
 import subprocess
 import time
-# Initialize model
-#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
-"""
-def generate_response(user_message):
-    encodeduserm = b"### Human: " + user_message.encode('utf-8') + b"\n### Assistant:"
-    tokens = llm.tokenize(encodeduserm)
-    output = b""
-    count = 0
-    for token in llm.generate(tokens, top_k=40, top_p=0.95, temp=0.72, repeat_penalty=1.1):
-        text = llm.detokenize([token])
-        output += text
-        count += 1
-        if count >= 500 or (token == llm.token_eos()):
-            break
-    return output.decode()
-"""
-"""
-def generate_response(user_message):
-    print("Before request")
     cmd = [
         "/app/llama.cpp/main",  # Path to the executable
         "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
@@ -34,12 +11,30 @@ def generate_response(user_message):
         "-n", "400",
         "-e"
     ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
-    print("After response")
-    return result.stdout
-"""
-def generate_response(user_message):
     cmd = [
         "/app/llama.cpp/main",  # Path to the executable
         "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
@@ -68,13 +63,27 @@ def generate_response(user_message):
         error_message = process.stderr.read()
         print(f"Error: {error_message}")
-iface = gr.Interface(
-    fn=generate_response,
-    inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
-    outputs="text",
-    title="LLaMA Chat Interface",
-    description="Enter your message and get a response from the LLaMA model.",
-    flagging_dir="/usr/src/app/flagged",
-)
 iface.launch(server_name="0.0.0.0") #share=True)

 import gradio as gr
 import random
 import subprocess
 import time
+def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
     cmd = [
         "/app/llama.cpp/main",  # Path to the executable
         "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
         "-n", "400",
         "-e"
     ]
+    # Start the subprocess
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    start_time = time.time()
+    alllines = ""
+    # Yield each line of output as it becomes available
+    for line in process.stdout:
+        alllines += " " + line
+        elapsed_time = time.time() - start_time  # Calculate elapsed time
+        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
+    # Wait for the subprocess to finish if it hasn't already
+    process.wait()
+    # Check for any errors
+    if process.returncode != 0:
+        error_message = process.stderr.read()
+        print(f"Error: {error_message}")
+def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
+    user_message = builtinprompt + '\n\n ' + user_message
     cmd = [
         "/app/llama.cpp/main",  # Path to the executable
         "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
         error_message = process.stderr.read()
         print(f"Error: {error_message}")
+CustomPrompts = [
+    "Class Diagram for:"
+    "Pydot code for:"
+]
+with gr.Blocks() as iface:
+    gr.Interface(
+        fn=generate_response,
+        inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
+        outputs="text",
+        title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
+        description="No Message History for now - Enter your message and get a response.",
+        flagging_dir="/usr/src/app/flagged",
+    )
+    gr.HTML()
+    MainOutput = gr.TextArea()
+    CustomButtonInput = gr.Input()
+    CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
+    CustomButtonPydotcode = gr.Button(CustomPrompts[1])
+    CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[0]], outputs=MainOutput)
+    CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[1]], outputs=MainOutput)
 iface.launch(server_name="0.0.0.0") #share=True)