kwabs22 commited on
Commit
078637c
1 Parent(s): e3894fb

Testing built in prompt?

Browse files
Files changed (1) hide show
  1. app.py +46 -37
app.py CHANGED
@@ -1,32 +1,9 @@
1
  import gradio as gr
2
- #from llama_cpp import Llama
3
  import random
4
  import subprocess
5
  import time
6
 
7
-
8
- # Initialize model
9
- #llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
10
-
11
- """
12
- def generate_response(user_message):
13
- encodeduserm = b"### Human: " + user_message.encode('utf-8') + b"\n### Assistant:"
14
- tokens = llm.tokenize(encodeduserm)
15
- output = b""
16
- count = 0
17
-
18
- for token in llm.generate(tokens, top_k=40, top_p=0.95, temp=0.72, repeat_penalty=1.1):
19
- text = llm.detokenize([token])
20
- output += text
21
- count += 1
22
- if count >= 500 or (token == llm.token_eos()):
23
- break
24
- return output.decode()
25
- """
26
-
27
- """
28
- def generate_response(user_message):
29
- print("Before request")
30
  cmd = [
31
  "/app/llama.cpp/main", # Path to the executable
32
  "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
@@ -34,12 +11,30 @@ def generate_response(user_message):
34
  "-n", "400",
35
  "-e"
36
  ]
37
- result = subprocess.run(cmd, capture_output=True, text=True)
38
- print("After response")
39
- return result.stdout
40
- """
41
 
42
- def generate_response(user_message):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  cmd = [
44
  "/app/llama.cpp/main", # Path to the executable
45
  "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
@@ -68,13 +63,27 @@ def generate_response(user_message):
68
  error_message = process.stderr.read()
69
  print(f"Error: {error_message}")
70
 
71
- iface = gr.Interface(
72
- fn=generate_response,
73
- inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
74
- outputs="text",
75
- title="LLaMA Chat Interface",
76
- description="Enter your message and get a response from the LLaMA model.",
77
- flagging_dir="/usr/src/app/flagged",
78
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  iface.launch(server_name="0.0.0.0") #share=True)
 
1
  import gradio as gr
 
2
  import random
3
  import subprocess
4
  import time
5
 
6
+ def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  cmd = [
8
  "/app/llama.cpp/main", # Path to the executable
9
  "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
 
11
  "-n", "400",
12
  "-e"
13
  ]
 
 
 
 
14
 
15
+ # Start the subprocess
16
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
17
+
18
+ start_time = time.time()
19
+ alllines = ""
20
+
21
+ # Yield each line of output as it becomes available
22
+ for line in process.stdout:
23
+ alllines += " " + line
24
+ elapsed_time = time.time() - start_time # Calculate elapsed time
25
+ yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
26
+
27
+ # Wait for the subprocess to finish if it hasn't already
28
+ process.wait()
29
+
30
+ # Check for any errors
31
+ if process.returncode != 0:
32
+ error_message = process.stderr.read()
33
+ print(f"Error: {error_message}")
34
+
35
+ def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
36
+ user_message = builtinprompt + '\n\n ' + user_message
37
+
38
  cmd = [
39
  "/app/llama.cpp/main", # Path to the executable
40
  "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
 
63
  error_message = process.stderr.read()
64
  print(f"Error: {error_message}")
65
 
66
+
67
+ CustomPrompts = [
68
+ "Class Diagram for:"
69
+ "Pydot code for:"
70
+ ]
71
+
72
+ with gr.Blocks() as iface:
73
+ gr.Interface(
74
+ fn=generate_response,
75
+ inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
76
+ outputs="text",
77
+ title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
78
+ description="No Message History for now - Enter your message and get a response.",
79
+ flagging_dir="/usr/src/app/flagged",
80
+ )
81
+ gr.HTML()
82
+ MainOutput = gr.TextArea()
83
+ CustomButtonInput = gr.Input()
84
+ CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
85
+ CustomButtonPydotcode = gr.Button(CustomPrompts[1])
86
+ CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[0]], outputs=MainOutput)
87
+ CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[1]], outputs=MainOutput)
88
 
89
  iface.launch(server_name="0.0.0.0") #share=True)