Spaces:

abhisheksan
/

poetica

Sleeping

App Files Files Community

abhisheksan commited on Nov 9, 2024

Commit

110ce35

1 Parent(s): 4bb8b78

Update model configuration and enhance initialization logic; adjust BASE_DIR for container, implement model download functionality, and improve health check response

Browse files

Files changed (2) hide show

app/config.py +2 -5
main.py +58 -24

app/config.py CHANGED Viewed

@@ -1,16 +1,13 @@
 import os
 from pathlib import Path
-# Base project directory
-BASE_DIR = Path(__file__).resolve().parent.parent
-# Model settings
 MODEL_DIR = BASE_DIR / "models"
 MODEL_NAME = "llama-2-7b-chat.q4_K_M.gguf"
 MODEL_PATH = MODEL_DIR / MODEL_NAME
 # Ensure model directory exists
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
 # Model download URL
 MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"

 import os
 from pathlib import Path
+# Base project directory (adjusted for container environment)
+BASE_DIR = Path("/app")
 MODEL_DIR = BASE_DIR / "models"
 MODEL_NAME = "llama-2-7b-chat.q4_K_M.gguf"
 MODEL_PATH = MODEL_DIR / MODEL_NAME
 # Ensure model directory exists
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
 # Model download URL
 MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"

main.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from fastapi import FastAPI, HTTPException, status
-from pydantic import BaseModel, Field
 from typing import Optional, List
 from ctransformers import AutoModelForCausalLM
 import time
 import logging
-from app.config import MODEL_PATH
 # Configure logging
 logging.basicConfig(
@@ -49,25 +49,23 @@ class PoetryResponse(BaseModel):
     style: str
 class ModelInfo(BaseModel):
     status: str
-    model_name: str
     model_path: str
     supported_styles: List[str]
     max_context_length: int
-@app.on_event("startup")
-async def startup_event():
-    """Initialize the model during startup"""
-    global model
-    try:
-        if not MODEL_PATH.exists():
-            raise FileNotFoundError(
-                f"Model file not found at {MODEL_PATH}. "
-                "Please run download_model.py first."
-            )
         logger.info(f"Loading model from {MODEL_PATH}")
-        model = AutoModelForCausalLM.from_pretrained(
             str(MODEL_PATH.parent),
             model_file=MODEL_PATH.name,
             model_type="llama",
@@ -75,10 +73,17 @@ async def startup_event():
             context_length=512,
             gpu_layers=0  # CPU only
         )
-        logger.info("Model loaded successfully")
     except Exception as e:
-        logger.error(f"Failed to load model: {str(e)}")
-        raise RuntimeError("Failed to initialize model")
 @app.get(
     "/health",
@@ -88,14 +93,10 @@ async def startup_event():
 )
 async def health_check():
     """Check if the model is loaded and get basic information"""
-    if model is None:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Model not loaded"
-        )
     return ModelInfo(
-        status="ready",
         model_name="Llama-2-7B-Chat",
         model_path=str(MODEL_PATH),
         supported_styles=[
@@ -119,7 +120,7 @@ async def generate_poem(request: PoetryRequest):
     if model is None:
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Model not loaded"
         )
     try:
@@ -159,6 +160,39 @@ async def generate_poem(request: PoetryRequest):
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail=f"Failed to generate poem: {str(e)}"
         )
 if __name__ == "__main__":
     import uvicorn

 from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel, ConfigDict, Field
 from typing import Optional, List
 from ctransformers import AutoModelForCausalLM
 import time
 import logging
+from app.config import MODEL_PATH, MODEL_URL
 # Configure logging
 logging.basicConfig(
     style: str
 class ModelInfo(BaseModel):
+    model_config = ConfigDict(protected_namespaces=())
     status: str
     model_path: str
+    model_name: str
     supported_styles: List[str]
     max_context_length: int
+def initialize_model():
+    """Initialize the model and return it"""
+    if not MODEL_PATH.exists():
+        logger.error(f"Model not found at {MODEL_PATH}")
+        return None
+    try:
         logger.info(f"Loading model from {MODEL_PATH}")
+        return AutoModelForCausalLM.from_pretrained(
             str(MODEL_PATH.parent),
             model_file=MODEL_PATH.name,
             model_type="llama",
             context_length=512,
             gpu_layers=0  # CPU only
         )
     except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        return None
+@app.on_event("startup")
+async def startup_event():
+    """Initialize the model during startup"""
+    global model
+    model = initialize_model()
+    if model is None:
+        logger.warning("Model failed to load but service will start anyway")
 @app.get(
     "/health",
 )
 async def health_check():
     """Check if the model is loaded and get basic information"""
+    model_status = "ready" if model is not None else "not_loaded"
     return ModelInfo(
+        status=model_status,
         model_name="Llama-2-7B-Chat",
         model_path=str(MODEL_PATH),
         supported_styles=[
     if model is None:
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Model not loaded. Please check /health endpoint for status."
         )
     try:
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail=f"Failed to generate poem: {str(e)}"
         )
+def download_model():
+    """Download the model if it doesn't exist"""
+    import requests
+    from tqdm import tqdm
+    if MODEL_PATH.exists():
+        logger.info(f"Model already exists at {MODEL_PATH}")
+        return
+    logger.info(f"Downloading model to {MODEL_PATH}")
+    try:
+        response = requests.get(MODEL_URL, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        with open(MODEL_PATH, 'wb') as file, tqdm(
+            desc="Downloading",
+            total=total_size,
+            unit='iB',
+            unit_scale=True,
+            unit_divisor=1024,
+        ) as pbar:
+            for data in response.iter_content(chunk_size=1024):
+                size = file.write(data)
+                pbar.update(size)
+        logger.info("Model downloaded successfully")
+    except Exception as e:
+        logger.error(f"Error downloading model: {str(e)}")
+        if MODEL_PATH.exists():
+            MODEL_PATH.unlink()
+        raise
 if __name__ == "__main__":
     import uvicorn