Daniel Marques commited on
Commit
9f4aea3
·
1 Parent(s): 1c287d4

feat: update model

Browse files
Files changed (2) hide show
  1. load_models.py +3 -3
  2. main.py +11 -9
load_models.py CHANGED
@@ -66,10 +66,10 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
66
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
  kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
68
 
69
- kwargs["stream"] = stream
70
 
71
- if stream == True:
72
- kwargs["callbacks"] = callbacks
73
 
74
  return LlamaCpp(**kwargs)
75
  except:
 
66
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
  kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
68
 
69
+ # kwargs["stream"] = stream
70
 
71
+ # if stream == True:
72
+ # kwargs["callbacks"] = callbacks
73
 
74
  return LlamaCpp(**kwargs)
75
  except:
main.py CHANGED
@@ -29,12 +29,14 @@ class Predict(BaseModel):
29
  class Delete(BaseModel):
30
  filename: str
31
 
32
- if torch.backends.mps.is_available():
33
- DEVICE_TYPE = "mps"
34
- elif torch.cuda.is_available():
35
- DEVICE_TYPE = "cuda"
36
- else:
37
- DEVICE_TYPE = "cpu"
 
 
38
 
39
  EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
40
  DB = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=EMBEDDINGS, client_settings=CHROMA_SETTINGS)
@@ -97,9 +99,9 @@ def run_ingest_route():
97
 
98
  run_langest_commands = ["python", "ingest.py"]
99
 
100
- if DEVICE_TYPE == "cpu":
101
- run_langest_commands.append("--device_type")
102
- run_langest_commands.append(DEVICE_TYPE)
103
 
104
  result = subprocess.run(run_langest_commands, capture_output=True)
105
 
 
29
  class Delete(BaseModel):
30
  filename: str
31
 
32
+ # if torch.backends.mps.is_available():
33
+ # DEVICE_TYPE = "mps"
34
+ # elif torch.cuda.is_available():
35
+ # DEVICE_TYPE = "cuda"
36
+ # else:
37
+ # DEVICE_TYPE = "cpu"
38
+
39
+ DEVICE_TYPE = "cuda"
40
 
41
  EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
42
  DB = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=EMBEDDINGS, client_settings=CHROMA_SETTINGS)
 
99
 
100
  run_langest_commands = ["python", "ingest.py"]
101
 
102
+ # if DEVICE_TYPE == "cpu":
103
+ # run_langest_commands.append("--device_type")
104
+ # run_langest_commands.append(DEVICE_TYPE)
105
 
106
  result = subprocess.run(run_langest_commands, capture_output=True)
107