from typing import Dict, Tuple

import torch
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

BOX_PROMPT_MODE = "box prompt"
MASK_GENERATION_MODE = "mask generation"
VIDEO_SEGMENTATION_MODE = "video segmentation"
MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE]

CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
CHECKPOINTS = {
    "tiny": ["sam2_hiera_t.yaml", "checkpoints/sam2_hiera_tiny.pt"],
    "small": ["sam2_hiera_s.yaml", "checkpoints/sam2_hiera_small.pt"],
    "base_plus": ["sam2_hiera_b+.yaml", "checkpoints/sam2_hiera_base_plus.pt"],
    "large": ["sam2_hiera_l.yaml", "checkpoints/sam2_hiera_large.pt"],
}


def load_models(
    device: torch.device
) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
    image_predictors = {}
    mask_generators = {}
    for key, (config, checkpoint) in CHECKPOINTS.items():
        model = build_sam2(config, checkpoint, device=device)
        image_predictors[key] = SAM2ImagePredictor(sam_model=model)
        mask_generators[key] = SAM2AutomaticMaskGenerator(
            model=model,
            points_per_side=32,
            points_per_batch=64,
            pred_iou_thresh=0.7,
            stability_score_thresh=0.92,
            stability_score_offset=0.7,
            crop_n_layers=1,
            box_nms_thresh=0.7,
        )
    return image_predictors, mask_generators