Update app.py
Browse files
app.py
CHANGED
@@ -85,11 +85,8 @@ def respond(
|
|
85 |
|
86 |
llm = Llama(
|
87 |
model_path=f"./{model}",
|
88 |
-
|
89 |
-
|
90 |
-
n_gpu_layers=81,
|
91 |
-
n_batch=1024,
|
92 |
-
n_ctx=8192,
|
93 |
)
|
94 |
provider = LlamaCppPythonProvider(llm)
|
95 |
|
@@ -123,7 +120,7 @@ def respond(
|
|
123 |
messages.add_message(assistant)
|
124 |
|
125 |
stream = agent.get_chat_response(
|
126 |
-
message
|
127 |
llm_sampling_settings=settings,
|
128 |
chat_history=messages,
|
129 |
returns_streaming_generator=True,
|
@@ -145,9 +142,9 @@ PLACEHOLDER = """
|
|
145 |
demo = gr.ChatInterface(
|
146 |
respond,
|
147 |
additional_inputs=[
|
148 |
-
gr.Textbox(value="", label="System message", rtl=False),
|
149 |
#gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
|
150 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.
|
151 |
# gr.Slider(
|
152 |
# minimum=0.1,
|
153 |
# maximum=1.0,
|
@@ -175,7 +172,7 @@ demo = gr.ChatInterface(
|
|
175 |
'dorna-llama3-8b-instruct.Q5_0.gguf',
|
176 |
'dorna-llama3-8b-instruct.bf16.gguf',
|
177 |
],
|
178 |
-
value="dorna-llama3-8b-instruct.
|
179 |
label="Model"
|
180 |
),
|
181 |
],
|
|
|
85 |
|
86 |
llm = Llama(
|
87 |
model_path=f"./{model}",
|
88 |
+
n_gpu_layers=-1,
|
89 |
+
n_ctx=2048,
|
|
|
|
|
|
|
90 |
)
|
91 |
provider = LlamaCppPythonProvider(llm)
|
92 |
|
|
|
120 |
messages.add_message(assistant)
|
121 |
|
122 |
stream = agent.get_chat_response(
|
123 |
+
message,
|
124 |
llm_sampling_settings=settings,
|
125 |
chat_history=messages,
|
126 |
returns_streaming_generator=True,
|
|
|
142 |
demo = gr.ChatInterface(
|
143 |
respond,
|
144 |
additional_inputs=[
|
145 |
+
gr.Textbox(value="You are a helpful Persian assistant. Please answer questions in the asked language.", label="System message", rtl=False),
|
146 |
#gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
|
147 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
|
148 |
# gr.Slider(
|
149 |
# minimum=0.1,
|
150 |
# maximum=1.0,
|
|
|
172 |
'dorna-llama3-8b-instruct.Q5_0.gguf',
|
173 |
'dorna-llama3-8b-instruct.bf16.gguf',
|
174 |
],
|
175 |
+
value="dorna-llama3-8b-instruct.Q8_0.gguf",
|
176 |
label="Model"
|
177 |
),
|
178 |
],
|