dfdfdfd

Sleeping

Yjhhh commited on Sep 4

Commit

1ff2b2e

•

1 Parent(s): 3e1e0dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,12 +2,11 @@ from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 import uvicorn
 import requests
-import asyncio
 import os
 import io
 import time
 from typing import List, Dict, Any
-from llama_cpp import Llama  # Ajusta según la biblioteca que estés utilizando
 from tqdm import tqdm
 app = FastAPI()
@@ -69,7 +68,7 @@ class ModelManager:
                 temp_filename = await self.save_model_to_temp_file(model_config)
                 start_time = time.time()
                 print(f"Cargando modelo desde {temp_filename}")
-                llama = Llama(temp_filename)  # Ajusta según la biblioteca y clase correctas
                 end_time = time.time()
                 load_duration = end_time - start_time
                 if load_duration > 0:
@@ -113,9 +112,10 @@ class ModelManager:
         async with self.index_lock:
             part_name = f"part_{part_index}"
             print(f"Indexando parte {part_index}")
-            llama_part = Llama(model_part)
-            self.model_parts[part_name] = llama_part
-            print(f"Parte {part_index} indexada")
     async def generate_response(self, user_input):
         results = []

 from pydantic import BaseModel
 import uvicorn
 import requests
 import os
 import io
 import time
 from typing import List, Dict, Any
+from llama_cpp import Llama  # Asegúrate de ajustar esto según la biblioteca que utilices
 from tqdm import tqdm
 app = FastAPI()
                 temp_filename = await self.save_model_to_temp_file(model_config)
                 start_time = time.time()
                 print(f"Cargando modelo desde {temp_filename}")
+                llama = Llama.load(temp_filename)  # Usa el método adecuado para cargar el modelo
                 end_time = time.time()
                 load_duration = end_time - start_time
                 if load_duration > 0:
         async with self.index_lock:
             part_name = f"part_{part_index}"
             print(f"Indexando parte {part_index}")
+            # Usar un nombre de archivo temporal para cada parte del modelo
+            with open(f"/tmp/{part_name}.gguf", 'wb') as f:
+                f.write(model_part.getvalue())
+            print(f"Parte {part_index} indexada y guardada")
     async def generate_response(self, user_input):
         results = []