Spaces:
Runtime error
Runtime error
// Copyright 2021 The Deeplab2 Authors. | |
// | |
// Licensed under the Apache License, Version 2.0 (the "License"); | |
// you may not use this file except in compliance with the License. | |
// You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, software | |
// distributed under the License is distributed on an "AS IS" BASIS, | |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
// See the License for the specific language governing permissions and | |
// limitations under the License. | |
syntax = "proto2"; | |
package deeplab2; | |
option java_multiple_files = true; | |
/********** Submessages used to config model options **********/ | |
// Configure the decoder model options. | |
message DecoderOptions { | |
// Set the features key for the high-level features, e.g. 'res5'. | |
optional string feature_key = 1; | |
// Set the number of filters in each convolution of the decoder. | |
optional int32 decoder_channels = 2 [default = 256]; | |
// Set the decoder convolution type. Support 'depthwise_separable_conv' and | |
// 'standard_conv'. | |
optional string decoder_conv_type = 5 [default = 'depthwise_separable_conv']; | |
// Set the number of filters in each convolution of the ASPP. | |
optional int32 aspp_channels = 3 [default = 256]; | |
// Set the list of atrous rates used in the ASPP. Note that this field has | |
// to be of length 3 (to specify the three 3x3 atrous convolutions in ASPP), | |
// and it is effective only when `aspp_use_only_1x1_proj_conv` is false. | |
repeated int32 atrous_rates = 4; | |
// The ASPP module uses only 1x1 projection convolution (i.e., the ASPP five | |
// branches consisting of one 1x1 convolution, three 3x3 atrous convolutions | |
// with specified `atrous_rates`, and the global average pooling are turned | |
// off, when `aspp_use_only_1x1_proj_conv` is true), equivalent to applying | |
// only one 1x1 convolution to reduce the feature map channels (obtained from | |
// encoder backbone) to the specified `aspp_channels`. This field is mainly | |
// used (i.e., set to true) when the encoder backbone is already able to | |
// efficiently capture long-range information, e.g., by axial attention blocks | |
// (for reference, see configs/cityscapes/axial_deeplab). | |
optional bool aspp_use_only_1x1_proj_conv = 6 [default = false]; | |
} | |
// Configure the low level features to use. | |
message LowLevelOptions { | |
// Set the name of the low-level feature, e.g. 'res2'. | |
optional string feature_key = 1; | |
// Set the number of filters for the 1x1 projection convolution. | |
optional int32 channels_project = 2; | |
} | |
// Configure the head options. | |
message HeadOptions { | |
// Set the number of filters in the last convolution, e.g. 1 or NUM_CLASSES. | |
optional int32 output_channels = 1; | |
// Set the number of filters in the 5x5 convolution, e.g. 256 or 32. | |
optional int32 head_channels = 2; | |
// Set the head convolution type. Support 'depthwise_separable_conv' and | |
// 'standard_conv' | |
optional string head_conv_type = 3 [default = 'depthwise_separable_conv']; | |
} | |
// Configure the instance branch. | |
message InstanceOptions { | |
// Set whether to use the instance branch. | |
optional bool enable = 1 [default = true]; | |
// Set the low level options used in instance branch. The list of | |
// LowLevelOptions must be ordered lower resolution to higher resolution. | |
// Leaving it empty will use the same low level options as the semantic | |
// branch. | |
repeated LowLevelOptions low_level_override = 2; | |
// Set the decoder options of the instance branch. Leaving it empty will use | |
// the same decoder options as the semantic branch. | |
optional DecoderOptions instance_decoder_override = 3; | |
// Configure instance center head. | |
optional HeadOptions center_head = 4; | |
// Configure instance regression head. | |
optional HeadOptions regression_head = 5; | |
// Configure next-frame instance regression head. | |
optional HeadOptions next_regression_head = 6; | |
} | |
// Configure the model options. | |
// Next ID: 12 | |
message ModelOptions { | |
// Configure model backbone. | |
message BackboneOptions { | |
// Set the name of the specific architecture of the family. | |
optional string name = 1 [default = 'resnet50']; | |
// Set the output stride of the encoder. | |
optional int32 output_stride = 2 [default = 32]; | |
// Set path to pretrained weights to load pretrained weights. | |
optional string pretrained_weights = 3; | |
// Set whether to use the squeeze-and-excite operation. | |
optional bool use_squeeze_and_excite = 4 [default = false]; | |
// Set the drop path keep probability for training. Default not to use. | |
optional float drop_path_keep_prob = 5 [default = 1.0]; | |
// Set the drop path schedule. Currently support (1) 'constant': use the | |
// same drop path probability for all blocks, and (2) 'linear': linearly | |
// decrease the drop path probability from 1.0 at the 0-th stage (or STEM) | |
// to drop_path_keep_prob at the last block. | |
optional string drop_path_schedule = 6 [default = 'constant']; | |
// Set the STEM width_multiplier, controlloing STEM convolution channels. | |
optional float stem_width_multiplier = 7 [default = 1.0]; | |
// Set the backbone (except STEM) width_multiplier, controlling backbone | |
// (except STEM) convolution channels. | |
optional float backbone_width_multiplier = 8 [default = 1.0]; | |
// Set the backbone (except STEM) layer_multiplier, controlling the number | |
// of layers in the backbone (except STEM). | |
optional float backbone_layer_multiplier = 9 [default = 1.0]; | |
// Use the Switchable Atrous Convolution (SAC) beyond the specified stride. | |
// For example, if use_sac_beyond_stride = 16, SAC will be applied to the | |
// network stage whose original output stride >= 16 (i.e., 16 and 32, or | |
// the last two stages). Set to -1 to disable it. | |
optional int32 use_sac_beyond_stride = 10 [default = -1]; | |
} | |
// Set the model option for the backbone encoder model. | |
optional BackboneOptions backbone = 1; | |
// Shared decoder settings across different meta architectures. | |
optional DecoderOptions decoder = 2; | |
// Meta-architecture specific settings. | |
message DeeplabV3Options { | |
// Set the number of classes for the last convolution to predict logits. | |
optional int32 num_classes = 1; | |
} | |
message DeeplabV3PlusOptions { | |
// Set the low level options used in this decoder. The list of | |
// LowLevelOptions must be ordered from higher to lower levels. | |
optional LowLevelOptions low_level = 1; | |
// Set the number of classes for the last convolution to predict logits. | |
optional int32 num_classes = 2; | |
} | |
message PanopticDeeplabOptions { | |
// Set the low level options used in this decoder. The list of | |
// LowLevelOptions must be ordered lower resolution to higher resolution. | |
repeated LowLevelOptions low_level = 1; | |
// Set the model options for the instance branch. | |
optional InstanceOptions instance = 2; | |
// Set the model options of the semantic head. | |
optional HeadOptions semantic_head = 3; | |
} | |
message MotionDeepLabOptions { | |
// Set the low level options used in this decoder. The list of | |
// LowLevelOptions must be ordered lower resolution to higher resolution. | |
repeated LowLevelOptions low_level = 1; | |
// Set the model options for the instance branch. | |
optional InstanceOptions instance = 2; | |
// Set the model options of the semantic head. | |
optional HeadOptions semantic_head = 3; | |
// Set the model options for the motion head. | |
optional HeadOptions motion_head = 4; | |
} | |
message MaXDeepLabOptions { | |
// Set the head options of the mask head. | |
optional HeadOptions pixel_space_head = 1; | |
// Set the low level options used in the semantic decoder. The list of | |
// LowLevelOptions must be ordered lower resolution to higher resolution. | |
repeated LowLevelOptions auxiliary_low_level = 2; | |
// Set the head options of the semantic head. | |
optional HeadOptions auxiliary_semantic_head = 3; | |
} | |
oneof meta_architecture { | |
DeeplabV3Options deeplab_v3 = 3; | |
DeeplabV3PlusOptions deeplab_v3_plus = 4; | |
PanopticDeeplabOptions panoptic_deeplab = 5; | |
MotionDeepLabOptions motion_deeplab = 7; | |
MaXDeepLabOptions max_deeplab = 10; | |
PanopticDeeplabOptions vip_deeplab = 11; | |
} | |
// Set the checkpoint to load. | |
optional string initial_checkpoint = 6; | |
// Set whether to restore the last convolution of the semantic head when | |
// loading from the initial checkpoint. Setting this flag to false is useful | |
// when an initial checkpoint was trained on a dataset with different classes. | |
optional bool restore_semantic_last_layer_from_initial_checkpoint = 8 | |
[default = true]; | |
// Set whether to restore the last convolution of the instance heads when | |
// loading from the initial checkpoint. Depending on the meta architecture, | |
// this includes center heatmap, center regression and motion regression. | |
optional bool restore_instance_last_layer_from_initial_checkpoint = 9 | |
[default = true]; | |
} | |