Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
•
5803ad5
1
Parent(s):
5603dfa
Update LLM wirh REPO_ID
Browse files- .ruff.toml +4 -0
- .stignore +1 -0
- app.py +14 -17
.ruff.toml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Assume Python 3.10.
|
2 |
+
target-version = "py310"
|
3 |
+
# Decrease the maximum line length to 79 characters.
|
4 |
+
line-length = 300
|
.stignore
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
.git
|
2 |
# Byte-compiled / optimized / DLL files
|
3 |
__pycache__
|
|
|
1 |
+
models
|
2 |
.git
|
3 |
# Byte-compiled / optimized / DLL files
|
4 |
__pycache__
|
app.py
CHANGED
@@ -6,9 +6,9 @@
|
|
6 |
# gradio.load("models/WizardLM/WizardCoder-15B-V1.0").launch()
|
7 |
|
8 |
import os
|
9 |
-
from pathlib import Path
|
10 |
import time
|
11 |
from dataclasses import asdict, dataclass
|
|
|
12 |
from types import SimpleNamespace
|
13 |
|
14 |
import gradio as gr
|
@@ -193,7 +193,7 @@ MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
|
193 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
194 |
|
195 |
# https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
|
196 |
-
MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin"
|
197 |
|
198 |
DESTINATION_FOLDER = "models"
|
199 |
|
@@ -204,8 +204,8 @@ if "WizardCoder" in MODEL_FILENAME:
|
|
204 |
if "uncensored" in MODEL_FILENAME.lower():
|
205 |
REPO_ID = "TheBloke/WizardLM-13B-V1.0-Uncensored-GGML"
|
206 |
|
|
|
207 |
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
208 |
-
|
209 |
logger.info("done dl")
|
210 |
|
211 |
# if "mpt" in model_filename:
|
@@ -227,21 +227,18 @@ llm = AutoModelForCausalLM.from_pretrained(
|
|
227 |
# """
|
228 |
|
229 |
logger.debug(f"{os.cpu_count()=}")
|
|
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
# model_file=MODEL_FILENAME,
|
242 |
-
# model_type="starcoder",
|
243 |
-
# threads=os.cpu_count() // 2 # type: ignore
|
244 |
-
# )
|
245 |
|
246 |
cpu_count = os.cpu_count() // 2 # type: ignore
|
247 |
logger.debug(f"{cpu_count=}")
|
|
|
6 |
# gradio.load("models/WizardLM/WizardCoder-15B-V1.0").launch()
|
7 |
|
8 |
import os
|
|
|
9 |
import time
|
10 |
from dataclasses import asdict, dataclass
|
11 |
+
from pathlib import Path
|
12 |
from types import SimpleNamespace
|
13 |
|
14 |
import gradio as gr
|
|
|
193 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
194 |
|
195 |
# https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
|
196 |
+
MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin" # 8.4G
|
197 |
|
198 |
DESTINATION_FOLDER = "models"
|
199 |
|
|
|
204 |
if "uncensored" in MODEL_FILENAME.lower():
|
205 |
REPO_ID = "TheBloke/WizardLM-13B-V1.0-Uncensored-GGML"
|
206 |
|
207 |
+
logger.info("start dl")
|
208 |
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
|
|
209 |
logger.info("done dl")
|
210 |
|
211 |
# if "mpt" in model_filename:
|
|
|
227 |
# """
|
228 |
|
229 |
logger.debug(f"{os.cpu_count()=}")
|
230 |
+
logger.info("load llm")
|
231 |
|
232 |
+
_ = Path("models", MODEL_FILENAME).absolute().as_posix()
|
233 |
+
LLM = AutoModelForCausalLM.from_pretrained(
|
234 |
+
# "TheBloke/WizardCoder-15B-1.0-GGML",
|
235 |
+
REPO_ID,
|
236 |
+
model_file=_,
|
237 |
+
model_type="starcoder",
|
238 |
+
threads=os.cpu_count() // 2, # type: ignore
|
239 |
+
)
|
240 |
+
|
241 |
+
logger.info("done load llm")
|
|
|
|
|
|
|
|
|
242 |
|
243 |
cpu_count = os.cpu_count() // 2 # type: ignore
|
244 |
logger.debug(f"{cpu_count=}")
|