Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from fastapi import FastAPI, HTTPException, Request
|
2 |
-
from fastapi import FastAPI, HTTPException, Request
|
3 |
from pydantic import BaseModel
|
4 |
import uvicorn
|
5 |
import requests
|
@@ -70,7 +69,8 @@ class ModelManager:
|
|
70 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
71 |
start_time = time.time()
|
72 |
print(f"Cargando modelo desde {temp_filename}")
|
73 |
-
|
|
|
74 |
end_time = time.time()
|
75 |
load_duration = end_time - start_time
|
76 |
if load_duration > 0:
|
@@ -114,8 +114,8 @@ class ModelManager:
|
|
114 |
async with self.index_lock:
|
115 |
part_name = f"part_{part_index}"
|
116 |
print(f"Indexando parte {part_index}")
|
117 |
-
|
118 |
-
with open(
|
119 |
f.write(model_part.getvalue())
|
120 |
print(f"Parte {part_index} indexada y guardada")
|
121 |
|
|
|
1 |
from fastapi import FastAPI, HTTPException, Request
|
|
|
2 |
from pydantic import BaseModel
|
3 |
import uvicorn
|
4 |
import requests
|
|
|
69 |
temp_filename = await self.save_model_to_temp_file(model_config)
|
70 |
start_time = time.time()
|
71 |
print(f"Cargando modelo desde {temp_filename}")
|
72 |
+
# Asegúrate de usar el método correcto para cargar el modelo
|
73 |
+
llama = Llama.load(temp_filename)
|
74 |
end_time = time.time()
|
75 |
load_duration = end_time - start_time
|
76 |
if load_duration > 0:
|
|
|
114 |
async with self.index_lock:
|
115 |
part_name = f"part_{part_index}"
|
116 |
print(f"Indexando parte {part_index}")
|
117 |
+
temp_filename = f"/tmp/{part_name}.gguf"
|
118 |
+
with open(temp_filename, 'wb') as f:
|
119 |
f.write(model_part.getvalue())
|
120 |
print(f"Parte {part_index} indexada y guardada")
|
121 |
|