Spaces:
Running
Running
toaster61
commited on
Commit
·
c43baac
1
Parent(s):
fb62a18
7b model
Browse files- Dockerfile +1 -1
- gradio_app.py +5 -4
Dockerfile
CHANGED
@@ -19,7 +19,7 @@ RUN mkdir translator
|
|
19 |
RUN chmod -R 777 translator
|
20 |
|
21 |
# Installing wget and downloading model.
|
22 |
-
ADD https://huggingface.co/TheBloke/
|
23 |
RUN chmod -R 777 /app/model.bin
|
24 |
# You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
|
25 |
|
|
|
19 |
RUN chmod -R 777 translator
|
20 |
|
21 |
# Installing wget and downloading model.
|
22 |
+
ADD https://huggingface.co/TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF/resolve/main/dolphin-2.2.1-ashhlimarp-mistral-7b.Q5_0.gguf /app/model.bin
|
23 |
RUN chmod -R 777 /app/model.bin
|
24 |
# You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
|
25 |
|
gradio_app.py
CHANGED
@@ -17,7 +17,7 @@ print("! SETTING MODEL IN EVALUATION MODE !")
|
|
17 |
translator_model.eval()
|
18 |
print("! INITING LLAMA MODEL !")
|
19 |
llm = Llama(model_path="./model.bin") # LLaMa model
|
20 |
-
llama_model_name = "TheBloke/
|
21 |
print("! INITING DONE !")
|
22 |
|
23 |
# Preparing things to work
|
@@ -25,7 +25,7 @@ translator_tokenizer.src_lang = "en"
|
|
25 |
title = "llama.cpp API"
|
26 |
desc = '''<h1>Hello, world!</h1>
|
27 |
This is showcase how to make own server with Llama2 model.<br>
|
28 |
-
I'm using here
|
29 |
But you can use GPU power as well!<br><br>
|
30 |
<h1>How to GPU?</h1>
|
31 |
Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
|
@@ -69,7 +69,7 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
|
|
69 |
try:
|
70 |
# this shitty fix will be until i willnt figure out why sometimes there is empty output
|
71 |
counter = 1
|
72 |
-
while
|
73 |
logs += f"Attempt {counter} to generate answer...\n"
|
74 |
output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
|
75 |
text = output["choices"][0]["text"]
|
@@ -77,7 +77,8 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
|
|
77 |
break
|
78 |
counter += 1
|
79 |
logs += f"Final attempt: {counter}\n"
|
80 |
-
|
|
|
81 |
|
82 |
if language in languages and language != "en":
|
83 |
logs += f"\nTranslating from en to {language}"
|
|
|
17 |
translator_model.eval()
|
18 |
print("! INITING LLAMA MODEL !")
|
19 |
llm = Llama(model_path="./model.bin") # LLaMa model
|
20 |
+
llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
|
21 |
print("! INITING DONE !")
|
22 |
|
23 |
# Preparing things to work
|
|
|
25 |
title = "llama.cpp API"
|
26 |
desc = '''<h1>Hello, world!</h1>
|
27 |
This is showcase how to make own server with Llama2 model.<br>
|
28 |
+
I'm using here 7b model just for example. Also here's only CPU power.<br>
|
29 |
But you can use GPU power as well!<br><br>
|
30 |
<h1>How to GPU?</h1>
|
31 |
Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
|
|
|
69 |
try:
|
70 |
# this shitty fix will be until i willnt figure out why sometimes there is empty output
|
71 |
counter = 1
|
72 |
+
while counter <= 10:
|
73 |
logs += f"Attempt {counter} to generate answer...\n"
|
74 |
output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
|
75 |
text = output["choices"][0]["text"]
|
|
|
77 |
break
|
78 |
counter += 1
|
79 |
logs += f"Final attempt: {counter}\n"
|
80 |
+
if len(text.strip()) > 1 and text.strip() not in ['', None, ' ']:
|
81 |
+
text = "Sorry, but something went wrong while generating answer. Try again or fix code. If you are maintainer of this space, look into logs."
|
82 |
|
83 |
if language in languages and language != "en":
|
84 |
logs += f"\nTranslating from en to {language}"
|