{"image_encoder": {"img_size": 1024, "in_chans": 3, "num_classes": 1000, "embed_dims": [64, 128, 160, 320], "depths": [2, 2, 6, 2], "num_heads": [2, 4, 5, 10], "window_sizes": [7, 7, 14, 7], "mlp_ratio": 4.0, "drop_rate": 0.0, "drop_path_rate": 0.0, "use_checkpoint": false, "mbconv_expand_ratio": 4.0, "local_conv_size": 3, "layer_lr_decay": 0.8}, "prompt_encoder": {"embed_dim": 256, "image_embedding_size": [64, 64], "input_image_size": [1024, 1024], "mask_in_chans": 16}, "mask_decoder": {"transformer_dim": 256, "iou_head_depth": 3, "iou_head_hidden_dim": 256, "num_multimask_outputs": 3, "transformer": {"depth": 2, "embedding_dim": 256, "mlp_dim": 2048, "num_heads": 8}}, "pixel_mean": [123.675, 116.28, 103.53], "pixel_std": [58.395, 57.12, 57.375]}