Aduc-sdr-2_5s

Paused

App Files Files Community

euIaxs22 commited on Oct 2

Commit

8623364

verified ·

1 Parent(s): b77d7cf

Update services/ltx_server.py

Browse files

Files changed (1) hide show

services/ltx_server.py +49 -64

services/ltx_server.py CHANGED Viewed

@@ -8,9 +8,7 @@ from typing import Optional, Tuple
 import torch
 from PIL import Image
-# Importa a função de fábrica do LTX-Video
-# A importação só funcionará depois que o repo for clonado e adicionado ao sys.path
-# Portanto, faremos a importação dentro do __init__
 APP_HOME = Path(os.environ.get("APP_HOME", "/app"))
@@ -27,33 +25,36 @@ class LTXServer:
     def __init__(self):
         if hasattr(self, '_initialized') and self._initialized: return
-        print("🚀 LTXServer (Manual Pipeline Assembly) inicializando...")
         self.OUTPUT_ROOT = APP_HOME / "outputs" / "ltx"
-        self.LTX_REPO_DIR = Path(os.getenv("LTX_REPO_DIR", "/data/LTX-Video"))
-        self.MODELS_DIR = Path("/data/ltx_models") # Um diretório unificado para todos os pesos
-        self.REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
-        self.CONFIG_PATH = APP_HOME / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml" # <--- Seu arquivo de config FP8
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.bfloat16 if self.device == "cuda" and torch.cuda.is_bf16_supported() else torch.float16
-        for p in [self.LTX_REPO_DIR.parent, self.MODELS_DIR, self.OUTPUT_ROOT]:
             p.mkdir(parents=True, exist_ok=True)
         self.setup_dependencies()
-        # Importações dinâmicas após o setup
         from ltx_video.inference import create_ltx_video_pipeline, create_latent_upsampler
         try:
-            print("[LTXServer] Montando pipelines a partir dos arquivos baixados...")
             with open(self.CONFIG_PATH, "r") as f:
                 self.config_yaml = yaml.safe_load(f)
-            # Monta a pipeline principal
             self._pipeline = create_ltx_video_pipeline(
                 ckpt_path=str(self.MODELS_DIR / self.config_yaml["checkpoint_path"]),
                 precision=self.config_yaml["precision"],
@@ -68,7 +69,7 @@ class LTXServer:
                 device=self.device
             )
-            print("✅ LTXServer (Manual Assembly) pronto.")
         except Exception as e:
             print(f"ERRO CRÍTICO ao montar as pipelines LTX: {e}")
             raise
@@ -76,53 +77,47 @@ class LTXServer:
         self._initialized = True
     def setup_dependencies(self):
-        self._ensure_repo()
-        self._ensure_models()
-    def _ensure_repo(self) -> None:
-        if not (self.LTX_REPO_DIR / ".git").exists():
-            print(f"[LTXServer] Clonando repositório de '{self.REPO_URL}'...")
-            subprocess.run(["git", "clone", "--depth", "1", self.REPO_URL, str(self.LTX_REPO_DIR)], check=True)
-            # Instala o pacote localmente
-            print("[LTXServer] Instalando LTX-Video em modo editável...")
-            subprocess.run([sys.executable, "-m", "pip", "install", "-e", f"{self.LTX_REPO_DIR}[inference-script]"], check=True)
         else:
-            print("[LTXServer] Repositório LTX-Video já existe.")
         if str(self.LTX_REPO_DIR) not in sys.path:
             sys.path.insert(0, str(self.LTX_REPO_DIR))
-    def _ensure_models(self) -> None:
-        """Baixa todos os arquivos de modelo necessários para a pasta MODELS_DIR."""
-        from huggingface_hub import hf_hub_download
-        print(f"[LTXServer] Verificando arquivos de modelo em {self.MODELS_DIR}...")
-        # Lista de arquivos a serem baixados do repositório principal
-        files_to_download = [
-            "ltxv-13b-0.9.8-distilled-fp8.safetensors", # Modelo principal
-            "ltxv-spatial-upscaler-0.9.8.safetensors",  # Upscaler
-            # Componentes adicionais como VAE e Text Encoder serão baixados
-            # pela `create_ltx_video_pipeline` usando o cache do HF_HOME.
-        ]
-        for filename in files_to_download:
-            if not (self.MODELS_DIR / filename).exists():
-                print(f"Baixando {filename}...")
-                hf_hub_download(
-                    repo_id="Lightricks/LTX-Video",
-                    filename=filename,
-                    local_dir=str(self.MODELS_DIR),
-                    token=os.getenv("HF_TOKEN")
-                )
-        print("[LTXServer] Arquivos de modelo essenciais verificados/baixados.")
     def run_inference(self, **kwargs) -> str:
-        # Importa as classes necessárias aqui, pois o sys.path foi modificado no __init__
         from ltx_video.pipelines.pipeline_ltx_video import LTXMultiScalePipeline, ConditioningItem
-        from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-        # Extrai os parâmetros com valores padrão
         prompt = kwargs.get("prompt")
         image_path = kwargs.get("image_path")
         target_height = kwargs.get("target_height")
@@ -132,28 +127,20 @@ class LTXServer:
         output_file_path = self.OUTPUT_ROOT / f"run_{int(time.time())}.mp4"
         generator = torch.Generator(device=self.device).manual_seed(seed)
-        # Monta o objeto de pipeline multi-escala
         multi_scale_pipeline = LTXMultiScalePipeline(self._pipeline, self._latent_upsampler)
-        # Prepara a condição da imagem
         conditions = None
         if image_path:
-            from diffusers.utils import export_to_video, load_image, load_video
-            from ltx_video.pipelines.pipeline_ltx_condition import LTXVideoCondition
             image = load_image(image_path)
             video_condition_input = load_video(export_to_video([image]))
             condition = LTXVideoCondition(video=video_condition_input, frame_index=0)
             conditions = [condition]
-        # Configura os parâmetros da chamada com base no arquivo YAML
         call_kwargs = {
-            "prompt": prompt,
-            "negative_prompt": "worst quality...",
             "height": target_height, "width": target_width, "num_frames": num_frames,
-            "generator": generator, "output_type": "pt",
-            "conditions": conditions,
             "decode_timestep": self.config_yaml["decode_timestep"],
             "decode_noise_scale": self.config_yaml["decode_noise_scale"],
             "first_pass": self.config_yaml["first_pass"],
@@ -164,8 +151,6 @@ class LTXServer:
         print("[LTXServer] Executando pipeline multi-escala...")
         result_tensor = multi_scale_pipeline(**call_kwargs).images
-        # Exporta para vídeo
-        from diffusers.utils import export_to_video
         video_np = result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy()
         video_np = (video_np * 255).astype("uint8")
         export_to_video(video_np, str(output_file_path), fps=24)

 import torch
 from PIL import Image
+# Importações serão feitas dinamicamente após o setup
 APP_HOME = Path(os.environ.get("APP_HOME", "/app"))
     def __init__(self):
         if hasattr(self, '_initialized') and self._initialized: return
+        print("🚀 LTXServer (Full Cache) inicializando...")
         self.OUTPUT_ROOT = APP_HOME / "outputs" / "ltx"
+        self.LTX_REPO_DIR = Path("/opt/LTX-Video") # Instalado pelo Dockerfile
+        self.MODELS_DIR = Path("/data/ltx_models") # Pasta unificada para TODOS os modelos
+        self.CONFIG_PATH = APP_HOME / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
+        self.HF_HOME_CACHE = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.bfloat16 if self.device == "cuda" and torch.cuda.is_bf16_supported() else torch.float16
+        for p in [self.MODELS_DIR, self.OUTPUT_ROOT]:
             p.mkdir(parents=True, exist_ok=True)
         self.setup_dependencies()
+        # Importações dinâmicas
         from ltx_video.inference import create_ltx_video_pipeline, create_latent_upsampler
         try:
+            print("[LTXServer] Montando pipelines a partir dos arquivos locais...")
             with open(self.CONFIG_PATH, "r") as f:
                 self.config_yaml = yaml.safe_load(f)
+            # Para que a `create_ltx_video_pipeline` encontre os modelos,
+            # o `text_encoder_model_name_or_path` deve apontar para o nosso diretório local.
+            self.config_yaml["text_encoder_model_name_or_path"] = str(self.MODELS_DIR)
+            # Monta a pipeline principal, passando o caminho para os pesos e o diretório do text encoder
             self._pipeline = create_ltx_video_pipeline(
                 ckpt_path=str(self.MODELS_DIR / self.config_yaml["checkpoint_path"]),
                 precision=self.config_yaml["precision"],
                 device=self.device
             )
+            print("✅ LTXServer (Full Cache) pronto.")
         except Exception as e:
             print(f"ERRO CRÍTICO ao montar as pipelines LTX: {e}")
             raise
         self._initialized = True
     def setup_dependencies(self):
+        """Clona o repo (se Dockerfile não o fez) e baixa TODOS os modelos necessários."""
+        self._ensure_repo_and_install()
+        self._ensure_models_full_download()
+    def _ensure_repo_and_install(self) -> None:
+        """Clona e instala o repositório LTX-Video."""
+        if not (self.LTX_REPO_DIR / "setup.py").exists():
+             print(f"[LTXServer] Clonando repositório LTX-Video para {self.LTX_REPO_DIR}...")
+             subprocess.run(["git", "clone", "--depth", "1", "https://github.com/Lightricks/LTX-Video.git", str(self.LTX_REPO_DIR)], check=True)
+             print("[LTXServer] Instalando LTX-Video em modo editável...")
+             subprocess.run([sys.executable, "-m", "pip", "install", "-e", f"{self.LTX_REPO_DIR}[inference-script]"], check=True)
         else:
+            print("[LTXServer] Repositório LTX-Video já existe e está instalado.")
         if str(self.LTX_REPO_DIR) not in sys.path:
             sys.path.insert(0, str(self.LTX_REPO_DIR))
+    def _ensure_models_full_download(self) -> None:
+        """Baixa o snapshot completo de todos os modelos necessários para o cache local."""
+        from huggingface_hub import snapshot_download
+        print(f"[LTXServer] Verificando snapshot completo dos modelos em {self.MODELS_DIR}...")
+        # Baixa todos os arquivos do repositório Lightricks/LTX-Video
+        # A função snapshot_download é idempotente e usa cache.
+        snapshot_download(
+            repo_id="Lightricks/LTX-Video",
+            local_dir=str(self.MODELS_DIR),
+            cache_dir=str(self.HF_HOME_CACHE),
+            token=os.getenv("HF_TOKEN"),
+            # Padrões para garantir que baixamos tudo, incluindo VAE, text encoder e os pesos
+            allow_patterns=["*.safetensors", "*.json", "*.py", "text_encoder/*", "vae/*", "scheduler/*"],
+        )
+        print("[LTXServer] Snapshot completo dos modelos verificado/baixado.")
     def run_inference(self, **kwargs) -> str:
+        # A lógica de inferência permanece a mesma da resposta anterior,
+        # pois ela já usa as pipelines que inicializamos.
         from ltx_video.pipelines.pipeline_ltx_video import LTXMultiScalePipeline, ConditioningItem
+        from diffusers.utils import export_to_video, load_image, load_video
         prompt = kwargs.get("prompt")
         image_path = kwargs.get("image_path")
         target_height = kwargs.get("target_height")
         output_file_path = self.OUTPUT_ROOT / f"run_{int(time.time())}.mp4"
         generator = torch.Generator(device=self.device).manual_seed(seed)
         multi_scale_pipeline = LTXMultiScalePipeline(self._pipeline, self._latent_upsampler)
         conditions = None
         if image_path:
             image = load_image(image_path)
             video_condition_input = load_video(export_to_video([image]))
             condition = LTXVideoCondition(video=video_condition_input, frame_index=0)
             conditions = [condition]
         call_kwargs = {
+            "prompt": prompt, "negative_prompt": "worst quality...",
             "height": target_height, "width": target_width, "num_frames": num_frames,
+            "generator": generator, "output_type": "pt", "conditions": conditions,
             "decode_timestep": self.config_yaml["decode_timestep"],
             "decode_noise_scale": self.config_yaml["decode_noise_scale"],
             "first_pass": self.config_yaml["first_pass"],
         print("[LTXServer] Executando pipeline multi-escala...")
         result_tensor = multi_scale_pipeline(**call_kwargs).images
         video_np = result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy()
         video_np = (video_np * 255).astype("uint8")
         export_to_video(video_np, str(output_file_path), fps=24)