|
_BASE_: config.yaml |
|
MODEL: |
|
META_ARCHITECTURE: "CATSeg" |
|
BACKBONE: |
|
FREEZE_AT: 0 |
|
NAME: "build_resnet_backbone" |
|
WEIGHTS: "R-101.pkl" |
|
RESNETS: |
|
DEPTH: 101 |
|
STEM_TYPE: "basic" |
|
STEM_OUT_CHANNELS: 64 |
|
STRIDE_IN_1X1: False |
|
OUT_FEATURES: ["res2", "res3", "res4"] |
|
PIXEL_MEAN: [123.675, 116.280, 103.530] |
|
PIXEL_STD: [58.395, 57.120, 57.375] |
|
SEM_SEG_HEAD: |
|
NAME: "CATSegHead" |
|
IN_FEATURES: ["res2", "res3", "res4"] |
|
IGNORE_VALUE: 255 |
|
NUM_CLASSES: 171 |
|
TRAIN_CLASS_JSON: "datasets/coco.json" |
|
TEST_CLASS_JSON: "datasets/coco.json" |
|
CLIP_PRETRAINED: "ViT-B/16" |
|
PROMPT_DEPTH: 0 |
|
PROMPT_LENGTH: 0 |
|
TEXT_AFFINITY_DIM: 512 |
|
TEXT_AFFINITY_PROJ_DIM: 128 |
|
APPEARANCE_AFFINITY_DIM: 1024 |
|
APPEARANCE_AFFINITY_PROJ_DIM: 128 |
|
DECODER_DIMS: [64, 32] |
|
DECODER_AFFINITY_DIMS: [512, 256] |
|
DECODER_AFFINITY_PROJ_DIMS: [32, 16] |
|
NUM_LAYERS: 2 |
|
NUM_HEADS: 4 |
|
HIDDEN_DIMS: 128 |
|
POOLING_SIZES: [2, 2] |
|
FEATURE_RESOLUTION: [24, 24] |
|
WINDOW_SIZES: 12 |
|
ATTENTION_TYPE: "linear" |
|
CLIP_FINETUNE: "attention" |
|
PROMPT_ENSEMBLE_TYPE: "imagenet" |
|
SOLVER: |
|
BACKBONE_MULTIPLIER: 0.01 |