Whisper_speaker_diarization / core /cache_manager.py
vumichien's picture
refactor: replace ECAPA-TDNN clustering with pyannote 3.1, drop JP tabs
2b8c67d
"""Resolve model cache dir + clean up stale YouTube downloads."""
import os
import time
from pathlib import Path
_DEFAULT_CACHE = Path("output/cache/models")
_OUTPUT_DIR = Path("output")
_YOUTUBE_DIR = _OUTPUT_DIR / "youtube"
def get_model_cache_dir():
"""Resolve model cache dir from MODEL_CACHE_DIR env, fall back to output/cache/models.
Sets HF_HOME, HUGGINGFACE_HUB_CACHE, TORCH_HOME so all framework caches share it.
"""
env_dir = os.getenv("MODEL_CACHE_DIR")
cache_dir = Path(env_dir) if env_dir else _DEFAULT_CACHE
cache_dir.mkdir(parents=True, exist_ok=True)
cache_str = str(cache_dir.resolve())
os.environ.setdefault("HF_HOME", cache_str)
os.environ.setdefault("HUGGINGFACE_HUB_CACHE", cache_str)
os.environ.setdefault("TORCH_HOME", cache_str)
return cache_dir
def get_output_dir():
return _OUTPUT_DIR
def get_youtube_output_dir():
return _YOUTUBE_DIR
def ensure_dirs():
_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
_YOUTUBE_DIR.mkdir(parents=True, exist_ok=True)
def cleanup_old_videos(directory=None, max_age_days=7, min_age_seconds=3600):
"""Delete files older than `max_age_days`, skipping anything younger than `min_age_seconds`.
Best-effort: logs but never raises.
"""
target = Path(directory) if directory else _YOUTUBE_DIR
if not target.exists():
return
now = time.time()
max_age_seconds = max_age_days * 86400
for entry in target.iterdir():
try:
if not entry.is_file():
continue
mtime = entry.stat().st_mtime
age = now - mtime
if age < min_age_seconds:
continue
if age > max_age_seconds:
entry.unlink()
print(f"[cache_manager] removed stale download: {entry.name} (age {age/86400:.1f}d)")
except OSError as exc:
print(f"[cache_manager] cleanup failed for {entry}: {exc}")