Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import LlamaCpp
2
+ from langchain.callbacks.manager import CallbackManager
3
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
4
+ import gradio as gr
5
+ import re
6
+ import os
7
+
8
+
9
+
10
+ n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
11
+ n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
12
+ n_ctx=2048
13
+
14
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
15
+ path = "Dorna-Llama3-8B-Instruct-GGUF"
16
+
17
+ llm = LlamaCpp(
18
+ model_path= path,
19
+ n_gpu_layers=n_gpu_layers, n_batch=n_batch,
20
+ callback_manager=callback_manager,
21
+ verbose=True,
22
+ n_ctx=n_ctx,
23
+ temperature=0.2,
24
+ max_tokens=200,
25
+ top_p=1,
26
+ )
27
+
28
+ prompt = """Below is an instruction that describes a task.
29
+ Write a response that appropriately completes the request.\n\n
30
+ ### Instruction:\n\n{}\n\n\n### Response:\n\n\n"""
31
+ def generate_output(text):
32
+ result = ""
33
+ for s in llm.stream(prompt.format(text)):
34
+ result += s
35
+ yield result
36
+
37
+
38
+ def clear():
39
+ return "", ""
40
+
41
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
42
+ with gr.Row():
43
+ inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
44
+
45
+ with gr.Row():
46
+ submit_btn= gr.Button("ارسال", variant="primary")
47
+ clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary")
48
+ with gr.Row():
49
+ outputs=gr.Textbox(label="خروجی",rtl=True)
50
+ submit_btn.click(fn=generate_output,
51
+ inputs= [inputs],
52
+ outputs= [outputs])
53
+ clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
54
+
55
+
56
+ demo.launch(server_name='0.0.0.0',share=True)