Spaces:
Running
Running
imperialwool
commited on
Update gradio_app.py
Browse files- gradio_app.py +4 -4
gradio_app.py
CHANGED
@@ -6,15 +6,15 @@ import psutil
|
|
6 |
|
7 |
# Initing things
|
8 |
print("! INITING LLAMA MODEL !")
|
9 |
-
llm = Llama(model_path="./model.bin")
|
10 |
-
llama_model_name = "Vikhrmodels/Vikhr-
|
11 |
print("! INITING DONE !")
|
12 |
|
13 |
# Preparing things to work
|
14 |
title = "llama.cpp API"
|
15 |
desc = '''<h1>Hello, world!</h1>
|
16 |
-
This is showcase how to make own server with
|
17 |
-
I'm using here
|
18 |
But you can use GPU power as well!<br><br>
|
19 |
<h1>How to GPU?</h1>
|
20 |
Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
|
|
|
6 |
|
7 |
# Initing things
|
8 |
print("! INITING LLAMA MODEL !")
|
9 |
+
llm = Llama(model_path="./model.bin") # LLaMa model
|
10 |
+
llama_model_name = "Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF" # This is just for indication in "three dots menu"
|
11 |
print("! INITING DONE !")
|
12 |
|
13 |
# Preparing things to work
|
14 |
title = "llama.cpp API"
|
15 |
desc = '''<h1>Hello, world!</h1>
|
16 |
+
This is showcase how to make own server with any Llama based model using llama_cpp.<br>
|
17 |
+
I'm using here 1.5b model just for example. Also here's only CPU power.<br>
|
18 |
But you can use GPU power as well!<br><br>
|
19 |
<h1>How to GPU?</h1>
|
20 |
Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
|