Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
·
9f4aea3
1
Parent(s):
1c287d4
feat: update model
Browse files- load_models.py +3 -3
- main.py +11 -9
load_models.py
CHANGED
@@ -66,10 +66,10 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
|
|
66 |
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
67 |
kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
|
68 |
|
69 |
-
kwargs["stream"] = stream
|
70 |
|
71 |
-
if stream == True:
|
72 |
-
|
73 |
|
74 |
return LlamaCpp(**kwargs)
|
75 |
except:
|
|
|
66 |
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
67 |
kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
|
68 |
|
69 |
+
# kwargs["stream"] = stream
|
70 |
|
71 |
+
# if stream == True:
|
72 |
+
# kwargs["callbacks"] = callbacks
|
73 |
|
74 |
return LlamaCpp(**kwargs)
|
75 |
except:
|
main.py
CHANGED
@@ -29,12 +29,14 @@ class Predict(BaseModel):
|
|
29 |
class Delete(BaseModel):
|
30 |
filename: str
|
31 |
|
32 |
-
if torch.backends.mps.is_available():
|
33 |
-
|
34 |
-
elif torch.cuda.is_available():
|
35 |
-
|
36 |
-
else:
|
37 |
-
|
|
|
|
|
38 |
|
39 |
EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
|
40 |
DB = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=EMBEDDINGS, client_settings=CHROMA_SETTINGS)
|
@@ -97,9 +99,9 @@ def run_ingest_route():
|
|
97 |
|
98 |
run_langest_commands = ["python", "ingest.py"]
|
99 |
|
100 |
-
if DEVICE_TYPE == "cpu":
|
101 |
-
|
102 |
-
|
103 |
|
104 |
result = subprocess.run(run_langest_commands, capture_output=True)
|
105 |
|
|
|
29 |
class Delete(BaseModel):
|
30 |
filename: str
|
31 |
|
32 |
+
# if torch.backends.mps.is_available():
|
33 |
+
# DEVICE_TYPE = "mps"
|
34 |
+
# elif torch.cuda.is_available():
|
35 |
+
# DEVICE_TYPE = "cuda"
|
36 |
+
# else:
|
37 |
+
# DEVICE_TYPE = "cpu"
|
38 |
+
|
39 |
+
DEVICE_TYPE = "cuda"
|
40 |
|
41 |
EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
|
42 |
DB = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=EMBEDDINGS, client_settings=CHROMA_SETTINGS)
|
|
|
99 |
|
100 |
run_langest_commands = ["python", "ingest.py"]
|
101 |
|
102 |
+
# if DEVICE_TYPE == "cpu":
|
103 |
+
# run_langest_commands.append("--device_type")
|
104 |
+
# run_langest_commands.append(DEVICE_TYPE)
|
105 |
|
106 |
result = subprocess.run(run_langest_commands, capture_output=True)
|
107 |
|