Upload with huggingface_hub
Browse files- pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log +0 -0
- pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log.json +0 -0
- pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/detr_pseudo_label.py +424 -0
- pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log +0 -0
- pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log.json +0 -0
- pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/final_model.pth +3 -0
- pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/selfsup_mask-rcnn_swin-b_simmim.py +447 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log +0 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log.json +0 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/final_model.pth +3 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/mask_rcnn.py +417 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log +0 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log.json +0 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/final_model.pth +3 -0
- pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/mask_rcnn.py +417 -0
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/detr_pseudo_label.py
ADDED
@@ -0,0 +1,424 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model = dict(
|
2 |
+
type='DETR',
|
3 |
+
backbone=dict(
|
4 |
+
type='ResNet',
|
5 |
+
depth=50,
|
6 |
+
num_stages=4,
|
7 |
+
out_indices=(3, ),
|
8 |
+
frozen_stages=4,
|
9 |
+
norm_cfg=dict(type='BN', requires_grad=False),
|
10 |
+
norm_eval=True,
|
11 |
+
style='pytorch',
|
12 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
13 |
+
bbox_head=dict(
|
14 |
+
type='DETRHead',
|
15 |
+
num_classes=256,
|
16 |
+
in_channels=2048,
|
17 |
+
transformer=dict(
|
18 |
+
type='Transformer',
|
19 |
+
encoder=dict(
|
20 |
+
type='DetrTransformerEncoder',
|
21 |
+
num_layers=6,
|
22 |
+
transformerlayers=dict(
|
23 |
+
type='BaseTransformerLayer',
|
24 |
+
attn_cfgs=[
|
25 |
+
dict(
|
26 |
+
type='MultiheadAttention',
|
27 |
+
embed_dims=256,
|
28 |
+
num_heads=8,
|
29 |
+
dropout=0.1)
|
30 |
+
],
|
31 |
+
feedforward_channels=2048,
|
32 |
+
ffn_dropout=0.1,
|
33 |
+
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
|
34 |
+
decoder=dict(
|
35 |
+
type='DetrTransformerDecoder',
|
36 |
+
return_intermediate=True,
|
37 |
+
num_layers=6,
|
38 |
+
transformerlayers=dict(
|
39 |
+
type='DetrTransformerDecoderLayer',
|
40 |
+
attn_cfgs=dict(
|
41 |
+
type='MultiheadAttention',
|
42 |
+
embed_dims=256,
|
43 |
+
num_heads=8,
|
44 |
+
dropout=0.1),
|
45 |
+
feedforward_channels=2048,
|
46 |
+
ffn_dropout=0.1,
|
47 |
+
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
|
48 |
+
'ffn', 'norm')))),
|
49 |
+
positional_encoding=dict(
|
50 |
+
type='SinePositionalEncoding', num_feats=128, normalize=True),
|
51 |
+
loss_cls=dict(
|
52 |
+
type='CrossEntropyLoss',
|
53 |
+
bg_cls_weight=0.1,
|
54 |
+
use_sigmoid=False,
|
55 |
+
loss_weight=1.0,
|
56 |
+
class_weight=1.0),
|
57 |
+
loss_bbox=dict(type='L1Loss', loss_weight=5.0),
|
58 |
+
loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
|
59 |
+
train_cfg=dict(
|
60 |
+
assigner=dict(
|
61 |
+
type='HungarianAssigner',
|
62 |
+
cls_cost=dict(type='ClassificationCost', weight=1.0),
|
63 |
+
reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
|
64 |
+
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
|
65 |
+
test_cfg=dict(max_per_img=100))
|
66 |
+
dataset_type = 'CocoDataset'
|
67 |
+
data_root = 'data/coco/'
|
68 |
+
img_norm_cfg = dict(
|
69 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
70 |
+
train_pipeline = [
|
71 |
+
dict(type='LoadImageFromFile'),
|
72 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
73 |
+
dict(type='RandomFlip', flip_ratio=0.5),
|
74 |
+
dict(
|
75 |
+
type='AutoAugment',
|
76 |
+
policies=[[{
|
77 |
+
'type':
|
78 |
+
'Resize',
|
79 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
80 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
81 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
82 |
+
'multiscale_mode':
|
83 |
+
'value',
|
84 |
+
'keep_ratio':
|
85 |
+
True
|
86 |
+
}],
|
87 |
+
[{
|
88 |
+
'type': 'Resize',
|
89 |
+
'img_scale': [(400, 1333), (500, 1333), (600, 1333)],
|
90 |
+
'multiscale_mode': 'value',
|
91 |
+
'keep_ratio': True
|
92 |
+
}, {
|
93 |
+
'type': 'RandomCrop',
|
94 |
+
'crop_type': 'absolute_range',
|
95 |
+
'crop_size': (384, 600),
|
96 |
+
'allow_negative_crop': True
|
97 |
+
}, {
|
98 |
+
'type':
|
99 |
+
'Resize',
|
100 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333),
|
101 |
+
(576, 1333), (608, 1333), (640, 1333),
|
102 |
+
(672, 1333), (704, 1333), (736, 1333),
|
103 |
+
(768, 1333), (800, 1333)],
|
104 |
+
'multiscale_mode':
|
105 |
+
'value',
|
106 |
+
'override':
|
107 |
+
True,
|
108 |
+
'keep_ratio':
|
109 |
+
True
|
110 |
+
}]]),
|
111 |
+
dict(
|
112 |
+
type='Normalize',
|
113 |
+
mean=[123.675, 116.28, 103.53],
|
114 |
+
std=[58.395, 57.12, 57.375],
|
115 |
+
to_rgb=True),
|
116 |
+
dict(type='Pad', size_divisor=1),
|
117 |
+
dict(type='DefaultFormatBundle'),
|
118 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
119 |
+
]
|
120 |
+
test_pipeline = [
|
121 |
+
dict(type='LoadImageFromFile'),
|
122 |
+
dict(
|
123 |
+
type='MultiScaleFlipAug',
|
124 |
+
img_scale=(1333, 800),
|
125 |
+
flip=False,
|
126 |
+
transforms=[
|
127 |
+
dict(type='Resize', keep_ratio=True),
|
128 |
+
dict(type='RandomFlip'),
|
129 |
+
dict(
|
130 |
+
type='Normalize',
|
131 |
+
mean=[123.675, 116.28, 103.53],
|
132 |
+
std=[58.395, 57.12, 57.375],
|
133 |
+
to_rgb=True),
|
134 |
+
dict(type='Pad', size_divisor=32),
|
135 |
+
dict(type='ImageToTensor', keys=['img']),
|
136 |
+
dict(type='Collect', keys=['img'])
|
137 |
+
])
|
138 |
+
]
|
139 |
+
data = dict(
|
140 |
+
samples_per_gpu=2,
|
141 |
+
workers_per_gpu=2,
|
142 |
+
train=dict(
|
143 |
+
type='CocoDataset',
|
144 |
+
ann_file='train2017_ratio3size0008@0.5_cluster-id-as-class.json',
|
145 |
+
img_prefix='data/coco/train2017/',
|
146 |
+
pipeline=[
|
147 |
+
dict(type='LoadImageFromFile'),
|
148 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
149 |
+
dict(type='RandomFlip', flip_ratio=0.5),
|
150 |
+
dict(
|
151 |
+
type='AutoAugment',
|
152 |
+
policies=[[{
|
153 |
+
'type':
|
154 |
+
'Resize',
|
155 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333),
|
156 |
+
(576, 1333), (608, 1333), (640, 1333),
|
157 |
+
(672, 1333), (704, 1333), (736, 1333),
|
158 |
+
(768, 1333), (800, 1333)],
|
159 |
+
'multiscale_mode':
|
160 |
+
'value',
|
161 |
+
'keep_ratio':
|
162 |
+
True
|
163 |
+
}],
|
164 |
+
[{
|
165 |
+
'type': 'Resize',
|
166 |
+
'img_scale': [(400, 1333), (500, 1333),
|
167 |
+
(600, 1333)],
|
168 |
+
'multiscale_mode': 'value',
|
169 |
+
'keep_ratio': True
|
170 |
+
}, {
|
171 |
+
'type': 'RandomCrop',
|
172 |
+
'crop_type': 'absolute_range',
|
173 |
+
'crop_size': (384, 600),
|
174 |
+
'allow_negative_crop': True
|
175 |
+
}, {
|
176 |
+
'type':
|
177 |
+
'Resize',
|
178 |
+
'img_scale': [(480, 1333), (512, 1333),
|
179 |
+
(544, 1333), (576, 1333),
|
180 |
+
(608, 1333), (640, 1333),
|
181 |
+
(672, 1333), (704, 1333),
|
182 |
+
(736, 1333), (768, 1333),
|
183 |
+
(800, 1333)],
|
184 |
+
'multiscale_mode':
|
185 |
+
'value',
|
186 |
+
'override':
|
187 |
+
True,
|
188 |
+
'keep_ratio':
|
189 |
+
True
|
190 |
+
}]]),
|
191 |
+
dict(
|
192 |
+
type='Normalize',
|
193 |
+
mean=[123.675, 116.28, 103.53],
|
194 |
+
std=[58.395, 57.12, 57.375],
|
195 |
+
to_rgb=True),
|
196 |
+
dict(type='Pad', size_divisor=1),
|
197 |
+
dict(type='DefaultFormatBundle'),
|
198 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
199 |
+
],
|
200 |
+
classes=[
|
201 |
+
'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5',
|
202 |
+
'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10',
|
203 |
+
'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14',
|
204 |
+
'cluster_15', 'cluster_16', 'cluster_17', 'cluster_18',
|
205 |
+
'cluster_19', 'cluster_20', 'cluster_21', 'cluster_22',
|
206 |
+
'cluster_23', 'cluster_24', 'cluster_25', 'cluster_26',
|
207 |
+
'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30',
|
208 |
+
'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34',
|
209 |
+
'cluster_35', 'cluster_36', 'cluster_37', 'cluster_38',
|
210 |
+
'cluster_39', 'cluster_40', 'cluster_41', 'cluster_42',
|
211 |
+
'cluster_43', 'cluster_44', 'cluster_45', 'cluster_46',
|
212 |
+
'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50',
|
213 |
+
'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54',
|
214 |
+
'cluster_55', 'cluster_56', 'cluster_57', 'cluster_58',
|
215 |
+
'cluster_59', 'cluster_60', 'cluster_61', 'cluster_62',
|
216 |
+
'cluster_63', 'cluster_64', 'cluster_65', 'cluster_66',
|
217 |
+
'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70',
|
218 |
+
'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74',
|
219 |
+
'cluster_75', 'cluster_76', 'cluster_77', 'cluster_78',
|
220 |
+
'cluster_79', 'cluster_80', 'cluster_81', 'cluster_82',
|
221 |
+
'cluster_83', 'cluster_84', 'cluster_85', 'cluster_86',
|
222 |
+
'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90',
|
223 |
+
'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94',
|
224 |
+
'cluster_95', 'cluster_96', 'cluster_97', 'cluster_98',
|
225 |
+
'cluster_99', 'cluster_100', 'cluster_101', 'cluster_102',
|
226 |
+
'cluster_103', 'cluster_104', 'cluster_105', 'cluster_106',
|
227 |
+
'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110',
|
228 |
+
'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114',
|
229 |
+
'cluster_115', 'cluster_116', 'cluster_117', 'cluster_118',
|
230 |
+
'cluster_119', 'cluster_120', 'cluster_121', 'cluster_122',
|
231 |
+
'cluster_123', 'cluster_124', 'cluster_125', 'cluster_126',
|
232 |
+
'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130',
|
233 |
+
'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134',
|
234 |
+
'cluster_135', 'cluster_136', 'cluster_137', 'cluster_138',
|
235 |
+
'cluster_139', 'cluster_140', 'cluster_141', 'cluster_142',
|
236 |
+
'cluster_143', 'cluster_144', 'cluster_145', 'cluster_146',
|
237 |
+
'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150',
|
238 |
+
'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154',
|
239 |
+
'cluster_155', 'cluster_156', 'cluster_157', 'cluster_158',
|
240 |
+
'cluster_159', 'cluster_160', 'cluster_161', 'cluster_162',
|
241 |
+
'cluster_163', 'cluster_164', 'cluster_165', 'cluster_166',
|
242 |
+
'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170',
|
243 |
+
'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174',
|
244 |
+
'cluster_175', 'cluster_176', 'cluster_177', 'cluster_178',
|
245 |
+
'cluster_179', 'cluster_180', 'cluster_181', 'cluster_182',
|
246 |
+
'cluster_183', 'cluster_184', 'cluster_185', 'cluster_186',
|
247 |
+
'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190',
|
248 |
+
'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194',
|
249 |
+
'cluster_195', 'cluster_196', 'cluster_197', 'cluster_198',
|
250 |
+
'cluster_199', 'cluster_200', 'cluster_201', 'cluster_202',
|
251 |
+
'cluster_203', 'cluster_204', 'cluster_205', 'cluster_206',
|
252 |
+
'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210',
|
253 |
+
'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214',
|
254 |
+
'cluster_215', 'cluster_216', 'cluster_217', 'cluster_218',
|
255 |
+
'cluster_219', 'cluster_220', 'cluster_221', 'cluster_222',
|
256 |
+
'cluster_223', 'cluster_224', 'cluster_225', 'cluster_226',
|
257 |
+
'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230',
|
258 |
+
'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234',
|
259 |
+
'cluster_235', 'cluster_236', 'cluster_237', 'cluster_238',
|
260 |
+
'cluster_239', 'cluster_240', 'cluster_241', 'cluster_242',
|
261 |
+
'cluster_243', 'cluster_244', 'cluster_245', 'cluster_246',
|
262 |
+
'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250',
|
263 |
+
'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254',
|
264 |
+
'cluster_255', 'cluster_256'
|
265 |
+
]),
|
266 |
+
val=dict(
|
267 |
+
type='CocoDataset',
|
268 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
269 |
+
img_prefix='data/coco/val2017/',
|
270 |
+
pipeline=[
|
271 |
+
dict(type='LoadImageFromFile'),
|
272 |
+
dict(
|
273 |
+
type='MultiScaleFlipAug',
|
274 |
+
img_scale=(1333, 800),
|
275 |
+
flip=False,
|
276 |
+
transforms=[
|
277 |
+
dict(type='Resize', keep_ratio=True),
|
278 |
+
dict(type='RandomFlip'),
|
279 |
+
dict(
|
280 |
+
type='Normalize',
|
281 |
+
mean=[123.675, 116.28, 103.53],
|
282 |
+
std=[58.395, 57.12, 57.375],
|
283 |
+
to_rgb=True),
|
284 |
+
dict(type='Pad', size_divisor=32),
|
285 |
+
dict(type='ImageToTensor', keys=['img']),
|
286 |
+
dict(type='Collect', keys=['img'])
|
287 |
+
])
|
288 |
+
]),
|
289 |
+
test=dict(
|
290 |
+
type='CocoDataset',
|
291 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
292 |
+
img_prefix='data/coco/val2017/',
|
293 |
+
pipeline=[
|
294 |
+
dict(type='LoadImageFromFile'),
|
295 |
+
dict(
|
296 |
+
type='MultiScaleFlipAug',
|
297 |
+
img_scale=(1333, 800),
|
298 |
+
flip=False,
|
299 |
+
transforms=[
|
300 |
+
dict(type='Resize', keep_ratio=True),
|
301 |
+
dict(type='RandomFlip'),
|
302 |
+
dict(
|
303 |
+
type='Normalize',
|
304 |
+
mean=[123.675, 116.28, 103.53],
|
305 |
+
std=[58.395, 57.12, 57.375],
|
306 |
+
to_rgb=True),
|
307 |
+
dict(type='Pad', size_divisor=32),
|
308 |
+
dict(type='ImageToTensor', keys=['img']),
|
309 |
+
dict(type='Collect', keys=['img'])
|
310 |
+
])
|
311 |
+
]))
|
312 |
+
evaluation = dict(
|
313 |
+
interval=65535, metric='bbox', save_best='auto', gpu_collect=True)
|
314 |
+
checkpoint_config = dict(interval=1)
|
315 |
+
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
316 |
+
custom_hooks = [
|
317 |
+
dict(type='NumClassCheckHook'),
|
318 |
+
dict(
|
319 |
+
type='MMDetWandbHook',
|
320 |
+
init_kwargs=dict(project='I2B', group='finetune'),
|
321 |
+
interval=50,
|
322 |
+
num_eval_images=0,
|
323 |
+
log_checkpoint=False)
|
324 |
+
]
|
325 |
+
dist_params = dict(backend='nccl')
|
326 |
+
log_level = 'INFO'
|
327 |
+
load_from = None
|
328 |
+
resume_from = None
|
329 |
+
workflow = [('train', 1)]
|
330 |
+
opencv_num_threads = 0
|
331 |
+
mp_start_method = 'fork'
|
332 |
+
auto_scale_lr = dict(enable=True, base_batch_size=64)
|
333 |
+
custom_imports = dict(
|
334 |
+
imports=[
|
335 |
+
'mmselfsup.datasets.pipelines',
|
336 |
+
'selfsup.core.hook.momentum_update_hook',
|
337 |
+
'selfsup.datasets.pipelines.selfsup_pipelines',
|
338 |
+
'selfsup.datasets.pipelines.rand_aug',
|
339 |
+
'selfsup.datasets.single_view_coco',
|
340 |
+
'selfsup.datasets.multi_view_coco',
|
341 |
+
'selfsup.models.losses.contrastive_loss',
|
342 |
+
'selfsup.models.dense_heads.fcos_head',
|
343 |
+
'selfsup.models.dense_heads.retina_head',
|
344 |
+
'selfsup.models.dense_heads.detr_head',
|
345 |
+
'selfsup.models.dense_heads.deformable_detr_head',
|
346 |
+
'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
|
347 |
+
'selfsup.models.roi_heads.standard_roi_head',
|
348 |
+
'selfsup.models.detectors.selfsup_detector',
|
349 |
+
'selfsup.models.detectors.selfsup_fcos',
|
350 |
+
'selfsup.models.detectors.selfsup_detr',
|
351 |
+
'selfsup.models.detectors.selfsup_deformable_detr',
|
352 |
+
'selfsup.models.detectors.selfsup_retinanet',
|
353 |
+
'selfsup.models.detectors.selfsup_mask_rcnn',
|
354 |
+
'selfsup.core.bbox.assigners.hungarian_assigner',
|
355 |
+
'selfsup.core.bbox.assigners.pseudo_hungarian_assigner',
|
356 |
+
'selfsup.core.bbox.match_costs.match_cost'
|
357 |
+
],
|
358 |
+
allow_failed_imports=False)
|
359 |
+
classes = [
|
360 |
+
'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5',
|
361 |
+
'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10',
|
362 |
+
'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', 'cluster_15',
|
363 |
+
'cluster_16', 'cluster_17', 'cluster_18', 'cluster_19', 'cluster_20',
|
364 |
+
'cluster_21', 'cluster_22', 'cluster_23', 'cluster_24', 'cluster_25',
|
365 |
+
'cluster_26', 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30',
|
366 |
+
'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', 'cluster_35',
|
367 |
+
'cluster_36', 'cluster_37', 'cluster_38', 'cluster_39', 'cluster_40',
|
368 |
+
'cluster_41', 'cluster_42', 'cluster_43', 'cluster_44', 'cluster_45',
|
369 |
+
'cluster_46', 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50',
|
370 |
+
'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', 'cluster_55',
|
371 |
+
'cluster_56', 'cluster_57', 'cluster_58', 'cluster_59', 'cluster_60',
|
372 |
+
'cluster_61', 'cluster_62', 'cluster_63', 'cluster_64', 'cluster_65',
|
373 |
+
'cluster_66', 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70',
|
374 |
+
'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', 'cluster_75',
|
375 |
+
'cluster_76', 'cluster_77', 'cluster_78', 'cluster_79', 'cluster_80',
|
376 |
+
'cluster_81', 'cluster_82', 'cluster_83', 'cluster_84', 'cluster_85',
|
377 |
+
'cluster_86', 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90',
|
378 |
+
'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', 'cluster_95',
|
379 |
+
'cluster_96', 'cluster_97', 'cluster_98', 'cluster_99', 'cluster_100',
|
380 |
+
'cluster_101', 'cluster_102', 'cluster_103', 'cluster_104', 'cluster_105',
|
381 |
+
'cluster_106', 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110',
|
382 |
+
'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', 'cluster_115',
|
383 |
+
'cluster_116', 'cluster_117', 'cluster_118', 'cluster_119', 'cluster_120',
|
384 |
+
'cluster_121', 'cluster_122', 'cluster_123', 'cluster_124', 'cluster_125',
|
385 |
+
'cluster_126', 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130',
|
386 |
+
'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', 'cluster_135',
|
387 |
+
'cluster_136', 'cluster_137', 'cluster_138', 'cluster_139', 'cluster_140',
|
388 |
+
'cluster_141', 'cluster_142', 'cluster_143', 'cluster_144', 'cluster_145',
|
389 |
+
'cluster_146', 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150',
|
390 |
+
'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', 'cluster_155',
|
391 |
+
'cluster_156', 'cluster_157', 'cluster_158', 'cluster_159', 'cluster_160',
|
392 |
+
'cluster_161', 'cluster_162', 'cluster_163', 'cluster_164', 'cluster_165',
|
393 |
+
'cluster_166', 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170',
|
394 |
+
'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', 'cluster_175',
|
395 |
+
'cluster_176', 'cluster_177', 'cluster_178', 'cluster_179', 'cluster_180',
|
396 |
+
'cluster_181', 'cluster_182', 'cluster_183', 'cluster_184', 'cluster_185',
|
397 |
+
'cluster_186', 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190',
|
398 |
+
'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', 'cluster_195',
|
399 |
+
'cluster_196', 'cluster_197', 'cluster_198', 'cluster_199', 'cluster_200',
|
400 |
+
'cluster_201', 'cluster_202', 'cluster_203', 'cluster_204', 'cluster_205',
|
401 |
+
'cluster_206', 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210',
|
402 |
+
'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', 'cluster_215',
|
403 |
+
'cluster_216', 'cluster_217', 'cluster_218', 'cluster_219', 'cluster_220',
|
404 |
+
'cluster_221', 'cluster_222', 'cluster_223', 'cluster_224', 'cluster_225',
|
405 |
+
'cluster_226', 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230',
|
406 |
+
'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', 'cluster_235',
|
407 |
+
'cluster_236', 'cluster_237', 'cluster_238', 'cluster_239', 'cluster_240',
|
408 |
+
'cluster_241', 'cluster_242', 'cluster_243', 'cluster_244', 'cluster_245',
|
409 |
+
'cluster_246', 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250',
|
410 |
+
'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', 'cluster_255',
|
411 |
+
'cluster_256'
|
412 |
+
]
|
413 |
+
optimizer = dict(
|
414 |
+
type='AdamW',
|
415 |
+
lr=0.0002,
|
416 |
+
weight_decay=0.0001,
|
417 |
+
paramwise_cfg=dict(
|
418 |
+
custom_keys=dict(backbone=dict(lr_mult=0, decay_mult=0))))
|
419 |
+
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
|
420 |
+
lr_config = dict(policy='step', step=[40])
|
421 |
+
runner = dict(type='EpochBasedRunner', max_epochs=50)
|
422 |
+
work_dir = 'work_dirs/selfsup_detr_cluster-ids-as-pseudo-labels'
|
423 |
+
auto_resume = False
|
424 |
+
gpu_ids = range(0, 32)
|
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/final_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aca88dfee95a9cb04041b5b93a19169aaa3bb14ff12c237042bc981205d85ab
|
3 |
+
size 422177783
|
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/selfsup_mask-rcnn_swin-b_simmim.py
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model = dict(
|
2 |
+
type='SelfSupDetector',
|
3 |
+
backbone=dict(
|
4 |
+
type='SelfSupMaskRCNN',
|
5 |
+
backbone=dict(
|
6 |
+
type='SwinTransformer',
|
7 |
+
embed_dims=128,
|
8 |
+
depths=[2, 2, 18, 2],
|
9 |
+
num_heads=[4, 8, 16, 32],
|
10 |
+
window_size=7,
|
11 |
+
mlp_ratio=4,
|
12 |
+
qkv_bias=True,
|
13 |
+
qk_scale=None,
|
14 |
+
drop_rate=0.0,
|
15 |
+
attn_drop_rate=0.0,
|
16 |
+
drop_path_rate=0.2,
|
17 |
+
patch_norm=True,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
with_cp=False,
|
20 |
+
frozen_stages=4,
|
21 |
+
convert_weights=True,
|
22 |
+
init_cfg=dict(
|
23 |
+
type='Pretrained',
|
24 |
+
checkpoint=
|
25 |
+
'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth'
|
26 |
+
)),
|
27 |
+
neck=dict(
|
28 |
+
type='FPN',
|
29 |
+
in_channels=[128, 256, 512, 1024],
|
30 |
+
out_channels=256,
|
31 |
+
num_outs=5),
|
32 |
+
rpn_head=dict(
|
33 |
+
type='RPNHead',
|
34 |
+
in_channels=256,
|
35 |
+
feat_channels=256,
|
36 |
+
anchor_generator=dict(
|
37 |
+
type='AnchorGenerator',
|
38 |
+
scales=[8],
|
39 |
+
ratios=[0.5, 1.0, 2.0],
|
40 |
+
strides=[4, 8, 16, 32, 64]),
|
41 |
+
bbox_coder=dict(
|
42 |
+
type='DeltaXYWHBBoxCoder',
|
43 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
44 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
45 |
+
loss_cls=dict(
|
46 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
47 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
48 |
+
roi_head=dict(
|
49 |
+
type='SelfSupStandardRoIHead',
|
50 |
+
bbox_roi_extractor=dict(
|
51 |
+
type='SingleRoIExtractor',
|
52 |
+
roi_layer=dict(
|
53 |
+
type='RoIAlign', output_size=7, sampling_ratio=0),
|
54 |
+
out_channels=256,
|
55 |
+
featmap_strides=[4, 8, 16, 32]),
|
56 |
+
bbox_head=dict(
|
57 |
+
type='SelfSupShared4Conv1FCBBoxHead',
|
58 |
+
in_channels=256,
|
59 |
+
num_classes=256,
|
60 |
+
roi_feat_size=7,
|
61 |
+
reg_class_agnostic=False,
|
62 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0),
|
63 |
+
loss_cls=dict(
|
64 |
+
type='ContrastiveLoss', loss_weight=1.0, temperature=0.5)),
|
65 |
+
mask_roi_extractor=None,
|
66 |
+
mask_head=None),
|
67 |
+
train_cfg=dict(
|
68 |
+
rpn=dict(
|
69 |
+
assigner=dict(
|
70 |
+
type='MaxIoUAssigner',
|
71 |
+
pos_iou_thr=0.7,
|
72 |
+
neg_iou_thr=0.3,
|
73 |
+
min_pos_iou=0.3,
|
74 |
+
match_low_quality=True,
|
75 |
+
ignore_iof_thr=-1),
|
76 |
+
sampler=dict(
|
77 |
+
type='RandomSampler',
|
78 |
+
num=4096,
|
79 |
+
pos_fraction=1.0,
|
80 |
+
neg_pos_ub=-1,
|
81 |
+
add_gt_as_proposals=False),
|
82 |
+
allowed_border=-1,
|
83 |
+
pos_weight=-1,
|
84 |
+
debug=False),
|
85 |
+
rpn_proposal=dict(
|
86 |
+
nms_pre=2000,
|
87 |
+
max_per_img=1000,
|
88 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
89 |
+
min_bbox_size=0),
|
90 |
+
rcnn=dict(
|
91 |
+
assigner=dict(
|
92 |
+
type='MaxIoUAssigner',
|
93 |
+
pos_iou_thr=0.5,
|
94 |
+
neg_iou_thr=0.5,
|
95 |
+
min_pos_iou=0.5,
|
96 |
+
match_low_quality=True,
|
97 |
+
ignore_iof_thr=-1,
|
98 |
+
gt_max_assign_all=False),
|
99 |
+
sampler=dict(
|
100 |
+
type='RandomSampler',
|
101 |
+
num=4096,
|
102 |
+
pos_fraction=1,
|
103 |
+
neg_pos_ub=0,
|
104 |
+
add_gt_as_proposals=True),
|
105 |
+
mask_size=28,
|
106 |
+
pos_weight=-1,
|
107 |
+
debug=False)),
|
108 |
+
test_cfg=dict(
|
109 |
+
rpn=dict(
|
110 |
+
nms_pre=1000,
|
111 |
+
max_per_img=1000,
|
112 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
113 |
+
min_bbox_size=0),
|
114 |
+
rcnn=dict(
|
115 |
+
score_thr=0.05,
|
116 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
117 |
+
max_per_img=100,
|
118 |
+
mask_thr_binary=0.5)),
|
119 |
+
init_cfg=dict(
|
120 |
+
type='Pretrained',
|
121 |
+
checkpoint='pretrain/simmim_swin-b_mmselfsup-pretrain.pth')))
|
122 |
+
train_dataset_type = 'MultiViewCocoDataset'
|
123 |
+
test_dataset_type = 'CocoDataset'
|
124 |
+
data_root = 'data/coco/'
|
125 |
+
classes = ['selective_search']
|
126 |
+
img_norm_cfg = dict(
|
127 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
128 |
+
load_pipeline = [
|
129 |
+
dict(type='LoadImageFromFile'),
|
130 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
131 |
+
]
|
132 |
+
train_pipeline1 = [
|
133 |
+
dict(
|
134 |
+
type='Resize',
|
135 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
136 |
+
(1333, 768), (1333, 800)],
|
137 |
+
multiscale_mode='value',
|
138 |
+
keep_ratio=True),
|
139 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
140 |
+
dict(type='Pad', size_divisor=32),
|
141 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
142 |
+
dict(
|
143 |
+
type='OneOf',
|
144 |
+
transforms=[
|
145 |
+
dict(type='Identity'),
|
146 |
+
dict(type='AutoContrast'),
|
147 |
+
dict(type='RandEqualize'),
|
148 |
+
dict(type='RandSolarize'),
|
149 |
+
dict(type='RandColor'),
|
150 |
+
dict(type='RandContrast'),
|
151 |
+
dict(type='RandBrightness'),
|
152 |
+
dict(type='RandSharpness'),
|
153 |
+
dict(type='RandPosterize')
|
154 |
+
]),
|
155 |
+
dict(
|
156 |
+
type='Normalize',
|
157 |
+
mean=[123.675, 116.28, 103.53],
|
158 |
+
std=[58.395, 57.12, 57.375],
|
159 |
+
to_rgb=True),
|
160 |
+
dict(type='DefaultFormatBundle'),
|
161 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
162 |
+
]
|
163 |
+
train_pipeline2 = [
|
164 |
+
dict(
|
165 |
+
type='Resize',
|
166 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
167 |
+
(1333, 768), (1333, 800)],
|
168 |
+
multiscale_mode='value',
|
169 |
+
keep_ratio=True),
|
170 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
171 |
+
dict(type='Pad', size_divisor=32),
|
172 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
173 |
+
dict(
|
174 |
+
type='OneOf',
|
175 |
+
transforms=[
|
176 |
+
dict(type='Identity'),
|
177 |
+
dict(type='AutoContrast'),
|
178 |
+
dict(type='RandEqualize'),
|
179 |
+
dict(type='RandSolarize'),
|
180 |
+
dict(type='RandColor'),
|
181 |
+
dict(type='RandContrast'),
|
182 |
+
dict(type='RandBrightness'),
|
183 |
+
dict(type='RandSharpness'),
|
184 |
+
dict(type='RandPosterize')
|
185 |
+
]),
|
186 |
+
dict(
|
187 |
+
type='Normalize',
|
188 |
+
mean=[123.675, 116.28, 103.53],
|
189 |
+
std=[58.395, 57.12, 57.375],
|
190 |
+
to_rgb=True),
|
191 |
+
dict(type='DefaultFormatBundle'),
|
192 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
193 |
+
]
|
194 |
+
test_pipeline = [
|
195 |
+
dict(type='LoadImageFromFile'),
|
196 |
+
dict(
|
197 |
+
type='MultiScaleFlipAug',
|
198 |
+
img_scale=(1333, 800),
|
199 |
+
flip=False,
|
200 |
+
transforms=[
|
201 |
+
dict(type='Resize', keep_ratio=True),
|
202 |
+
dict(type='RandomFlip'),
|
203 |
+
dict(
|
204 |
+
type='Normalize',
|
205 |
+
mean=[123.675, 116.28, 103.53],
|
206 |
+
std=[58.395, 57.12, 57.375],
|
207 |
+
to_rgb=True),
|
208 |
+
dict(type='Pad', size_divisor=32),
|
209 |
+
dict(type='ImageToTensor', keys=['img']),
|
210 |
+
dict(type='Collect', keys=['img'])
|
211 |
+
])
|
212 |
+
]
|
213 |
+
data = dict(
|
214 |
+
samples_per_gpu=4,
|
215 |
+
workers_per_gpu=2,
|
216 |
+
train=dict(
|
217 |
+
type='MultiViewCocoDataset',
|
218 |
+
dataset=dict(
|
219 |
+
type='CocoDataset',
|
220 |
+
classes=['selective_search'],
|
221 |
+
ann_file=
|
222 |
+
'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
|
223 |
+
img_prefix='data/coco/train2017/',
|
224 |
+
pipeline=[
|
225 |
+
dict(type='LoadImageFromFile'),
|
226 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
227 |
+
]),
|
228 |
+
num_views=2,
|
229 |
+
pipelines=[[{
|
230 |
+
'type':
|
231 |
+
'Resize',
|
232 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
233 |
+
(1333, 768), (1333, 800)],
|
234 |
+
'multiscale_mode':
|
235 |
+
'value',
|
236 |
+
'keep_ratio':
|
237 |
+
True
|
238 |
+
}, {
|
239 |
+
'type': 'FilterAnnotations',
|
240 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
241 |
+
}, {
|
242 |
+
'type': 'Pad',
|
243 |
+
'size_divisor': 32
|
244 |
+
}, {
|
245 |
+
'type': 'RandFlip',
|
246 |
+
'flip_ratio': 0.5
|
247 |
+
}, {
|
248 |
+
'type':
|
249 |
+
'OneOf',
|
250 |
+
'transforms': [{
|
251 |
+
'type': 'Identity'
|
252 |
+
}, {
|
253 |
+
'type': 'AutoContrast'
|
254 |
+
}, {
|
255 |
+
'type': 'RandEqualize'
|
256 |
+
}, {
|
257 |
+
'type': 'RandSolarize'
|
258 |
+
}, {
|
259 |
+
'type': 'RandColor'
|
260 |
+
}, {
|
261 |
+
'type': 'RandContrast'
|
262 |
+
}, {
|
263 |
+
'type': 'RandBrightness'
|
264 |
+
}, {
|
265 |
+
'type': 'RandSharpness'
|
266 |
+
}, {
|
267 |
+
'type': 'RandPosterize'
|
268 |
+
}]
|
269 |
+
}, {
|
270 |
+
'type': 'Normalize',
|
271 |
+
'mean': [123.675, 116.28, 103.53],
|
272 |
+
'std': [58.395, 57.12, 57.375],
|
273 |
+
'to_rgb': True
|
274 |
+
}, {
|
275 |
+
'type': 'DefaultFormatBundle'
|
276 |
+
}, {
|
277 |
+
'type': 'Collect',
|
278 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
279 |
+
}],
|
280 |
+
[{
|
281 |
+
'type':
|
282 |
+
'Resize',
|
283 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704),
|
284 |
+
(1333, 736), (1333, 768), (1333, 800)],
|
285 |
+
'multiscale_mode':
|
286 |
+
'value',
|
287 |
+
'keep_ratio':
|
288 |
+
True
|
289 |
+
}, {
|
290 |
+
'type': 'FilterAnnotations',
|
291 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
292 |
+
}, {
|
293 |
+
'type': 'Pad',
|
294 |
+
'size_divisor': 32
|
295 |
+
}, {
|
296 |
+
'type': 'RandFlip',
|
297 |
+
'flip_ratio': 0.5
|
298 |
+
}, {
|
299 |
+
'type':
|
300 |
+
'OneOf',
|
301 |
+
'transforms': [{
|
302 |
+
'type': 'Identity'
|
303 |
+
}, {
|
304 |
+
'type': 'AutoContrast'
|
305 |
+
}, {
|
306 |
+
'type': 'RandEqualize'
|
307 |
+
}, {
|
308 |
+
'type': 'RandSolarize'
|
309 |
+
}, {
|
310 |
+
'type': 'RandColor'
|
311 |
+
}, {
|
312 |
+
'type': 'RandContrast'
|
313 |
+
}, {
|
314 |
+
'type': 'RandBrightness'
|
315 |
+
}, {
|
316 |
+
'type': 'RandSharpness'
|
317 |
+
}, {
|
318 |
+
'type': 'RandPosterize'
|
319 |
+
}]
|
320 |
+
}, {
|
321 |
+
'type': 'Normalize',
|
322 |
+
'mean': [123.675, 116.28, 103.53],
|
323 |
+
'std': [58.395, 57.12, 57.375],
|
324 |
+
'to_rgb': True
|
325 |
+
}, {
|
326 |
+
'type': 'DefaultFormatBundle'
|
327 |
+
}, {
|
328 |
+
'type': 'Collect',
|
329 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
330 |
+
}]]),
|
331 |
+
val=dict(
|
332 |
+
type='CocoDataset',
|
333 |
+
classes=['selective_search'],
|
334 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
335 |
+
img_prefix='data/coco/val2017/',
|
336 |
+
pipeline=[
|
337 |
+
dict(type='LoadImageFromFile'),
|
338 |
+
dict(
|
339 |
+
type='MultiScaleFlipAug',
|
340 |
+
img_scale=(1333, 800),
|
341 |
+
flip=False,
|
342 |
+
transforms=[
|
343 |
+
dict(type='Resize', keep_ratio=True),
|
344 |
+
dict(type='RandomFlip'),
|
345 |
+
dict(
|
346 |
+
type='Normalize',
|
347 |
+
mean=[123.675, 116.28, 103.53],
|
348 |
+
std=[58.395, 57.12, 57.375],
|
349 |
+
to_rgb=True),
|
350 |
+
dict(type='Pad', size_divisor=32),
|
351 |
+
dict(type='ImageToTensor', keys=['img']),
|
352 |
+
dict(type='Collect', keys=['img'])
|
353 |
+
])
|
354 |
+
]),
|
355 |
+
test=dict(
|
356 |
+
type='CocoDataset',
|
357 |
+
classes=['selective_search'],
|
358 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
359 |
+
img_prefix='data/coco/val2017/',
|
360 |
+
pipeline=[
|
361 |
+
dict(type='LoadImageFromFile'),
|
362 |
+
dict(
|
363 |
+
type='MultiScaleFlipAug',
|
364 |
+
img_scale=(1333, 800),
|
365 |
+
flip=False,
|
366 |
+
transforms=[
|
367 |
+
dict(type='Resize', keep_ratio=True),
|
368 |
+
dict(type='RandomFlip'),
|
369 |
+
dict(
|
370 |
+
type='Normalize',
|
371 |
+
mean=[123.675, 116.28, 103.53],
|
372 |
+
std=[58.395, 57.12, 57.375],
|
373 |
+
to_rgb=True),
|
374 |
+
dict(type='Pad', size_divisor=32),
|
375 |
+
dict(type='ImageToTensor', keys=['img']),
|
376 |
+
dict(type='Collect', keys=['img'])
|
377 |
+
])
|
378 |
+
]))
|
379 |
+
evaluation = dict(interval=65535, gpu_collect=True, metric='bbox')
|
380 |
+
optimizer = dict(
|
381 |
+
type='AdamW',
|
382 |
+
lr=6e-05,
|
383 |
+
betas=(0.9, 0.999),
|
384 |
+
weight_decay=0.05,
|
385 |
+
paramwise_cfg=dict(
|
386 |
+
custom_keys=dict(
|
387 |
+
absolute_pos_embed=dict(decay_mult=0.0),
|
388 |
+
relative_position_bias_table=dict(decay_mult=0.0),
|
389 |
+
norm=dict(decay_mult=0.0))))
|
390 |
+
optimizer_config = dict(grad_clip=None)
|
391 |
+
lr_config = dict(
|
392 |
+
policy='step',
|
393 |
+
warmup='linear',
|
394 |
+
warmup_iters=1000,
|
395 |
+
warmup_ratio=0.001,
|
396 |
+
step=[8, 11])
|
397 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
398 |
+
checkpoint_config = dict(interval=1)
|
399 |
+
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
400 |
+
custom_hooks = [
|
401 |
+
dict(type='MomentumUpdateHook'),
|
402 |
+
dict(
|
403 |
+
type='MMDetWandbHook',
|
404 |
+
init_kwargs=dict(project='I2B', group='pretrain'),
|
405 |
+
interval=50,
|
406 |
+
num_eval_images=0,
|
407 |
+
log_checkpoint=False)
|
408 |
+
]
|
409 |
+
dist_params = dict(backend='nccl')
|
410 |
+
log_level = 'INFO'
|
411 |
+
load_from = None
|
412 |
+
resume_from = None
|
413 |
+
workflow = [('train', 1)]
|
414 |
+
opencv_num_threads = 0
|
415 |
+
mp_start_method = 'fork'
|
416 |
+
auto_scale_lr = dict(enable=True, base_batch_size=32)
|
417 |
+
custom_imports = dict(
|
418 |
+
imports=[
|
419 |
+
'mmselfsup.datasets.pipelines',
|
420 |
+
'selfsup.core.hook.momentum_update_hook',
|
421 |
+
'selfsup.datasets.pipelines.selfsup_pipelines',
|
422 |
+
'selfsup.datasets.pipelines.rand_aug',
|
423 |
+
'selfsup.datasets.single_view_coco',
|
424 |
+
'selfsup.datasets.multi_view_coco',
|
425 |
+
'selfsup.models.losses.contrastive_loss',
|
426 |
+
'selfsup.models.dense_heads.fcos_head',
|
427 |
+
'selfsup.models.dense_heads.retina_head',
|
428 |
+
'selfsup.models.dense_heads.detr_head',
|
429 |
+
'selfsup.models.dense_heads.deformable_detr_head',
|
430 |
+
'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
|
431 |
+
'selfsup.models.roi_heads.standard_roi_head',
|
432 |
+
'selfsup.models.detectors.selfsup_detector',
|
433 |
+
'selfsup.models.detectors.selfsup_fcos',
|
434 |
+
'selfsup.models.detectors.selfsup_detr',
|
435 |
+
'selfsup.models.detectors.selfsup_deformable_detr',
|
436 |
+
'selfsup.models.detectors.selfsup_retinanet',
|
437 |
+
'selfsup.models.detectors.selfsup_mask_rcnn',
|
438 |
+
'selfsup.core.bbox.assigners.hungarian_assigner',
|
439 |
+
'selfsup.core.bbox.assigners.pseudo_hungarian_assigner',
|
440 |
+
'selfsup.core.bbox.match_costs.match_cost'
|
441 |
+
],
|
442 |
+
allow_failed_imports=False)
|
443 |
+
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth'
|
444 |
+
find_unused_parameters = True
|
445 |
+
work_dir = 'work_dirs/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain'
|
446 |
+
auto_resume = False
|
447 |
+
gpu_ids = range(0, 8)
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/final_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7905e6dc0b5158a6f896575d62a3f3ed0c1dd9d7ff4bb8e923339c2b46b587c
|
3 |
+
size 170913485
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/mask_rcnn.py
ADDED
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model = dict(
|
2 |
+
type='SelfSupDetector',
|
3 |
+
backbone=dict(
|
4 |
+
type='SelfSupMaskRCNN',
|
5 |
+
backbone=dict(
|
6 |
+
type='ResNet',
|
7 |
+
depth=50,
|
8 |
+
num_stages=4,
|
9 |
+
out_indices=(0, 1, 2, 3),
|
10 |
+
frozen_stages=4,
|
11 |
+
norm_cfg=dict(type='BN', requires_grad=False),
|
12 |
+
norm_eval=True,
|
13 |
+
style='pytorch',
|
14 |
+
init_cfg=dict(
|
15 |
+
type='Pretrained',
|
16 |
+
checkpoint='pretrain/mocov2_resnet50_256bs-coslr-800e_in1k.pth'
|
17 |
+
)),
|
18 |
+
neck=dict(
|
19 |
+
type='FPN',
|
20 |
+
in_channels=[256, 512, 1024, 2048],
|
21 |
+
out_channels=256,
|
22 |
+
num_outs=5),
|
23 |
+
rpn_head=dict(
|
24 |
+
type='RPNHead',
|
25 |
+
in_channels=256,
|
26 |
+
feat_channels=256,
|
27 |
+
anchor_generator=dict(
|
28 |
+
type='AnchorGenerator',
|
29 |
+
scales=[8],
|
30 |
+
ratios=[0.5, 1.0, 2.0],
|
31 |
+
strides=[4, 8, 16, 32, 64]),
|
32 |
+
bbox_coder=dict(
|
33 |
+
type='DeltaXYWHBBoxCoder',
|
34 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
35 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
36 |
+
loss_cls=dict(
|
37 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
38 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
39 |
+
roi_head=dict(
|
40 |
+
type='SelfSupStandardRoIHead',
|
41 |
+
bbox_roi_extractor=dict(
|
42 |
+
type='SingleRoIExtractor',
|
43 |
+
roi_layer=dict(
|
44 |
+
type='RoIAlign', output_size=7, sampling_ratio=0),
|
45 |
+
out_channels=256,
|
46 |
+
featmap_strides=[4, 8, 16, 32]),
|
47 |
+
bbox_head=dict(
|
48 |
+
type='SelfSupShared4Conv1FCBBoxHead',
|
49 |
+
in_channels=256,
|
50 |
+
num_classes=256,
|
51 |
+
roi_feat_size=7,
|
52 |
+
loss_cls=dict(
|
53 |
+
type='ContrastiveLoss', loss_weight=1.0, temperature=0.2)),
|
54 |
+
mask_roi_extractor=None,
|
55 |
+
mask_head=None),
|
56 |
+
train_cfg=dict(
|
57 |
+
rpn=dict(
|
58 |
+
assigner=dict(
|
59 |
+
type='MaxIoUAssigner',
|
60 |
+
pos_iou_thr=0.7,
|
61 |
+
neg_iou_thr=0.3,
|
62 |
+
min_pos_iou=0.3,
|
63 |
+
match_low_quality=True,
|
64 |
+
ignore_iof_thr=-1),
|
65 |
+
sampler=dict(
|
66 |
+
type='RandomSampler',
|
67 |
+
num=4096,
|
68 |
+
pos_fraction=1.0,
|
69 |
+
neg_pos_ub=-1,
|
70 |
+
add_gt_as_proposals=False),
|
71 |
+
allowed_border=-1,
|
72 |
+
pos_weight=-1,
|
73 |
+
debug=False),
|
74 |
+
rpn_proposal=dict(
|
75 |
+
nms_pre=2000,
|
76 |
+
max_per_img=1000,
|
77 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
78 |
+
min_bbox_size=0),
|
79 |
+
rcnn=dict(
|
80 |
+
assigner=dict(
|
81 |
+
type='MaxIoUAssigner',
|
82 |
+
pos_iou_thr=0.5,
|
83 |
+
neg_iou_thr=0.5,
|
84 |
+
min_pos_iou=0.5,
|
85 |
+
match_low_quality=True,
|
86 |
+
ignore_iof_thr=-1,
|
87 |
+
gt_max_assign_all=False),
|
88 |
+
sampler=dict(
|
89 |
+
type='RandomSampler',
|
90 |
+
num=4096,
|
91 |
+
pos_fraction=1,
|
92 |
+
neg_pos_ub=0,
|
93 |
+
add_gt_as_proposals=True),
|
94 |
+
mask_size=28,
|
95 |
+
pos_weight=-1,
|
96 |
+
debug=False)),
|
97 |
+
test_cfg=dict(
|
98 |
+
rpn=dict(
|
99 |
+
nms_pre=1000,
|
100 |
+
max_per_img=1000,
|
101 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
102 |
+
min_bbox_size=0),
|
103 |
+
rcnn=dict(
|
104 |
+
score_thr=0.05,
|
105 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
106 |
+
max_per_img=100,
|
107 |
+
mask_thr_binary=0.5))))
|
108 |
+
train_dataset_type = 'MultiViewCocoDataset'
|
109 |
+
test_dataset_type = 'CocoDataset'
|
110 |
+
data_root = 'data/coco/'
|
111 |
+
classes = ['selective_search']
|
112 |
+
img_norm_cfg = dict(
|
113 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
114 |
+
load_pipeline = [
|
115 |
+
dict(type='LoadImageFromFile'),
|
116 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
117 |
+
]
|
118 |
+
train_pipeline1 = [
|
119 |
+
dict(
|
120 |
+
type='Resize',
|
121 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
122 |
+
(1333, 768), (1333, 800)],
|
123 |
+
multiscale_mode='value',
|
124 |
+
keep_ratio=True),
|
125 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
126 |
+
dict(type='Pad', size_divisor=32),
|
127 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
128 |
+
dict(
|
129 |
+
type='OneOf',
|
130 |
+
transforms=[
|
131 |
+
dict(type='Identity'),
|
132 |
+
dict(type='AutoContrast'),
|
133 |
+
dict(type='RandEqualize'),
|
134 |
+
dict(type='RandSolarize'),
|
135 |
+
dict(type='RandColor'),
|
136 |
+
dict(type='RandContrast'),
|
137 |
+
dict(type='RandBrightness'),
|
138 |
+
dict(type='RandSharpness'),
|
139 |
+
dict(type='RandPosterize')
|
140 |
+
]),
|
141 |
+
dict(
|
142 |
+
type='Normalize',
|
143 |
+
mean=[123.675, 116.28, 103.53],
|
144 |
+
std=[58.395, 57.12, 57.375],
|
145 |
+
to_rgb=True),
|
146 |
+
dict(type='DefaultFormatBundle'),
|
147 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
148 |
+
]
|
149 |
+
train_pipeline2 = [
|
150 |
+
dict(
|
151 |
+
type='Resize',
|
152 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
153 |
+
(1333, 768), (1333, 800)],
|
154 |
+
multiscale_mode='value',
|
155 |
+
keep_ratio=True),
|
156 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
157 |
+
dict(type='Pad', size_divisor=32),
|
158 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
159 |
+
dict(
|
160 |
+
type='OneOf',
|
161 |
+
transforms=[
|
162 |
+
dict(type='Identity'),
|
163 |
+
dict(type='AutoContrast'),
|
164 |
+
dict(type='RandEqualize'),
|
165 |
+
dict(type='RandSolarize'),
|
166 |
+
dict(type='RandColor'),
|
167 |
+
dict(type='RandContrast'),
|
168 |
+
dict(type='RandBrightness'),
|
169 |
+
dict(type='RandSharpness'),
|
170 |
+
dict(type='RandPosterize')
|
171 |
+
]),
|
172 |
+
dict(
|
173 |
+
type='Normalize',
|
174 |
+
mean=[123.675, 116.28, 103.53],
|
175 |
+
std=[58.395, 57.12, 57.375],
|
176 |
+
to_rgb=True),
|
177 |
+
dict(type='DefaultFormatBundle'),
|
178 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
179 |
+
]
|
180 |
+
test_pipeline = [
|
181 |
+
dict(type='LoadImageFromFile'),
|
182 |
+
dict(
|
183 |
+
type='MultiScaleFlipAug',
|
184 |
+
img_scale=(1333, 800),
|
185 |
+
flip=False,
|
186 |
+
transforms=[
|
187 |
+
dict(type='Resize', keep_ratio=True),
|
188 |
+
dict(type='RandomFlip'),
|
189 |
+
dict(
|
190 |
+
type='Normalize',
|
191 |
+
mean=[123.675, 116.28, 103.53],
|
192 |
+
std=[58.395, 57.12, 57.375],
|
193 |
+
to_rgb=True),
|
194 |
+
dict(type='Pad', size_divisor=32),
|
195 |
+
dict(type='ImageToTensor', keys=['img']),
|
196 |
+
dict(type='Collect', keys=['img'])
|
197 |
+
])
|
198 |
+
]
|
199 |
+
data = dict(
|
200 |
+
samples_per_gpu=2,
|
201 |
+
workers_per_gpu=2,
|
202 |
+
train=dict(
|
203 |
+
type='MultiViewCocoDataset',
|
204 |
+
dataset=dict(
|
205 |
+
type='CocoDataset',
|
206 |
+
classes=['selective_search'],
|
207 |
+
ann_file=
|
208 |
+
'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
|
209 |
+
img_prefix='data/coco/train2017/',
|
210 |
+
pipeline=[
|
211 |
+
dict(type='LoadImageFromFile'),
|
212 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
213 |
+
]),
|
214 |
+
num_views=2,
|
215 |
+
pipelines=[[{
|
216 |
+
'type':
|
217 |
+
'Resize',
|
218 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
219 |
+
(1333, 768), (1333, 800)],
|
220 |
+
'multiscale_mode':
|
221 |
+
'value',
|
222 |
+
'keep_ratio':
|
223 |
+
True
|
224 |
+
}, {
|
225 |
+
'type': 'FilterAnnotations',
|
226 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
227 |
+
}, {
|
228 |
+
'type': 'Pad',
|
229 |
+
'size_divisor': 32
|
230 |
+
}, {
|
231 |
+
'type': 'RandFlip',
|
232 |
+
'flip_ratio': 0.5
|
233 |
+
}, {
|
234 |
+
'type':
|
235 |
+
'OneOf',
|
236 |
+
'transforms': [{
|
237 |
+
'type': 'Identity'
|
238 |
+
}, {
|
239 |
+
'type': 'AutoContrast'
|
240 |
+
}, {
|
241 |
+
'type': 'RandEqualize'
|
242 |
+
}, {
|
243 |
+
'type': 'RandSolarize'
|
244 |
+
}, {
|
245 |
+
'type': 'RandColor'
|
246 |
+
}, {
|
247 |
+
'type': 'RandContrast'
|
248 |
+
}, {
|
249 |
+
'type': 'RandBrightness'
|
250 |
+
}, {
|
251 |
+
'type': 'RandSharpness'
|
252 |
+
}, {
|
253 |
+
'type': 'RandPosterize'
|
254 |
+
}]
|
255 |
+
}, {
|
256 |
+
'type': 'Normalize',
|
257 |
+
'mean': [123.675, 116.28, 103.53],
|
258 |
+
'std': [58.395, 57.12, 57.375],
|
259 |
+
'to_rgb': True
|
260 |
+
}, {
|
261 |
+
'type': 'DefaultFormatBundle'
|
262 |
+
}, {
|
263 |
+
'type': 'Collect',
|
264 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
265 |
+
}],
|
266 |
+
[{
|
267 |
+
'type':
|
268 |
+
'Resize',
|
269 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704),
|
270 |
+
(1333, 736), (1333, 768), (1333, 800)],
|
271 |
+
'multiscale_mode':
|
272 |
+
'value',
|
273 |
+
'keep_ratio':
|
274 |
+
True
|
275 |
+
}, {
|
276 |
+
'type': 'FilterAnnotations',
|
277 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
278 |
+
}, {
|
279 |
+
'type': 'Pad',
|
280 |
+
'size_divisor': 32
|
281 |
+
}, {
|
282 |
+
'type': 'RandFlip',
|
283 |
+
'flip_ratio': 0.5
|
284 |
+
}, {
|
285 |
+
'type':
|
286 |
+
'OneOf',
|
287 |
+
'transforms': [{
|
288 |
+
'type': 'Identity'
|
289 |
+
}, {
|
290 |
+
'type': 'AutoContrast'
|
291 |
+
}, {
|
292 |
+
'type': 'RandEqualize'
|
293 |
+
}, {
|
294 |
+
'type': 'RandSolarize'
|
295 |
+
}, {
|
296 |
+
'type': 'RandColor'
|
297 |
+
}, {
|
298 |
+
'type': 'RandContrast'
|
299 |
+
}, {
|
300 |
+
'type': 'RandBrightness'
|
301 |
+
}, {
|
302 |
+
'type': 'RandSharpness'
|
303 |
+
}, {
|
304 |
+
'type': 'RandPosterize'
|
305 |
+
}]
|
306 |
+
}, {
|
307 |
+
'type': 'Normalize',
|
308 |
+
'mean': [123.675, 116.28, 103.53],
|
309 |
+
'std': [58.395, 57.12, 57.375],
|
310 |
+
'to_rgb': True
|
311 |
+
}, {
|
312 |
+
'type': 'DefaultFormatBundle'
|
313 |
+
}, {
|
314 |
+
'type': 'Collect',
|
315 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
316 |
+
}]]),
|
317 |
+
val=dict(
|
318 |
+
type='CocoDataset',
|
319 |
+
classes=['selective_search'],
|
320 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
321 |
+
img_prefix='data/coco/val2017/',
|
322 |
+
pipeline=[
|
323 |
+
dict(type='LoadImageFromFile'),
|
324 |
+
dict(
|
325 |
+
type='MultiScaleFlipAug',
|
326 |
+
img_scale=(1333, 800),
|
327 |
+
flip=False,
|
328 |
+
transforms=[
|
329 |
+
dict(type='Resize', keep_ratio=True),
|
330 |
+
dict(type='RandomFlip'),
|
331 |
+
dict(
|
332 |
+
type='Normalize',
|
333 |
+
mean=[123.675, 116.28, 103.53],
|
334 |
+
std=[58.395, 57.12, 57.375],
|
335 |
+
to_rgb=True),
|
336 |
+
dict(type='Pad', size_divisor=32),
|
337 |
+
dict(type='ImageToTensor', keys=['img']),
|
338 |
+
dict(type='Collect', keys=['img'])
|
339 |
+
])
|
340 |
+
]),
|
341 |
+
test=dict(
|
342 |
+
type='CocoDataset',
|
343 |
+
classes=['selective_search'],
|
344 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
345 |
+
img_prefix='data/coco/val2017/',
|
346 |
+
pipeline=[
|
347 |
+
dict(type='LoadImageFromFile'),
|
348 |
+
dict(
|
349 |
+
type='MultiScaleFlipAug',
|
350 |
+
img_scale=(1333, 800),
|
351 |
+
flip=False,
|
352 |
+
transforms=[
|
353 |
+
dict(type='Resize', keep_ratio=True),
|
354 |
+
dict(type='RandomFlip'),
|
355 |
+
dict(
|
356 |
+
type='Normalize',
|
357 |
+
mean=[123.675, 116.28, 103.53],
|
358 |
+
std=[58.395, 57.12, 57.375],
|
359 |
+
to_rgb=True),
|
360 |
+
dict(type='Pad', size_divisor=32),
|
361 |
+
dict(type='ImageToTensor', keys=['img']),
|
362 |
+
dict(type='Collect', keys=['img'])
|
363 |
+
])
|
364 |
+
]))
|
365 |
+
evaluation = dict(metric='bbox', interval=65535)
|
366 |
+
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
367 |
+
optimizer_config = dict(grad_clip=None)
|
368 |
+
lr_config = dict(
|
369 |
+
policy='step',
|
370 |
+
warmup='linear',
|
371 |
+
warmup_iters=500,
|
372 |
+
warmup_ratio=0.001,
|
373 |
+
step=[8, 11])
|
374 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
375 |
+
checkpoint_config = dict(interval=1)
|
376 |
+
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
377 |
+
custom_hooks = [
|
378 |
+
dict(type='MomentumUpdateHook'),
|
379 |
+
dict(
|
380 |
+
type='MMDetWandbHook',
|
381 |
+
init_kwargs=dict(project='mmdet_pretrain', group='pretrain'),
|
382 |
+
interval=50,
|
383 |
+
num_eval_images=0,
|
384 |
+
log_checkpoint=False)
|
385 |
+
]
|
386 |
+
dist_params = dict(backend='nccl')
|
387 |
+
log_level = 'INFO'
|
388 |
+
load_from = None
|
389 |
+
resume_from = None
|
390 |
+
workflow = [('train', 1)]
|
391 |
+
opencv_num_threads = 0
|
392 |
+
mp_start_method = 'fork'
|
393 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
394 |
+
custom_imports = dict(
|
395 |
+
imports=[
|
396 |
+
'mmselfsup.core', 'mmselfsup.datasets.pipelines',
|
397 |
+
'selfsup.datasets.pipelines.selfsup_pipelines',
|
398 |
+
'selfsup.datasets.pipelines.rand_aug',
|
399 |
+
'selfsup.datasets.single_view_coco',
|
400 |
+
'selfsup.datasets.multi_view_coco',
|
401 |
+
'selfsup.models.losses.contrastive_loss',
|
402 |
+
'selfsup.models.dense_heads.fcos_head',
|
403 |
+
'selfsup.models.dense_heads.retina_head',
|
404 |
+
'selfsup.models.dense_heads.detr_head',
|
405 |
+
'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
|
406 |
+
'selfsup.models.roi_heads.standard_roi_head',
|
407 |
+
'selfsup.models.detectors.selfsup_detector',
|
408 |
+
'selfsup.models.detectors.selfsup_fcos',
|
409 |
+
'selfsup.models.detectors.selfsup_detr',
|
410 |
+
'selfsup.models.detectors.selfsup_retinanet',
|
411 |
+
'selfsup.models.detectors.selfsup_mask_rcnn',
|
412 |
+
'selfsup.core.bbox.match_costs.match_cost'
|
413 |
+
],
|
414 |
+
allow_failed_imports=False)
|
415 |
+
work_dir = 'work_dirs/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain'
|
416 |
+
auto_resume = False
|
417 |
+
gpu_ids = range(0, 8)
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/final_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4328af18c59d656037f81d1fd10de0878bc61849f1432e6adfe51edb37bf1bb
|
3 |
+
size 170913485
|
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/mask_rcnn.py
ADDED
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model = dict(
|
2 |
+
type='SelfSupDetector',
|
3 |
+
backbone=dict(
|
4 |
+
type='SelfSupMaskRCNN',
|
5 |
+
backbone=dict(
|
6 |
+
type='ResNet',
|
7 |
+
depth=50,
|
8 |
+
num_stages=4,
|
9 |
+
out_indices=(0, 1, 2, 3),
|
10 |
+
frozen_stages=4,
|
11 |
+
norm_cfg=dict(type='BN', requires_grad=False),
|
12 |
+
norm_eval=True,
|
13 |
+
style='pytorch',
|
14 |
+
init_cfg=dict(
|
15 |
+
type='Pretrained',
|
16 |
+
checkpoint='pretrain/pixpro_resnet50_8xb128-coslr-400e_in1k.pth'
|
17 |
+
)),
|
18 |
+
neck=dict(
|
19 |
+
type='FPN',
|
20 |
+
in_channels=[256, 512, 1024, 2048],
|
21 |
+
out_channels=256,
|
22 |
+
num_outs=5),
|
23 |
+
rpn_head=dict(
|
24 |
+
type='RPNHead',
|
25 |
+
in_channels=256,
|
26 |
+
feat_channels=256,
|
27 |
+
anchor_generator=dict(
|
28 |
+
type='AnchorGenerator',
|
29 |
+
scales=[8],
|
30 |
+
ratios=[0.5, 1.0, 2.0],
|
31 |
+
strides=[4, 8, 16, 32, 64]),
|
32 |
+
bbox_coder=dict(
|
33 |
+
type='DeltaXYWHBBoxCoder',
|
34 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
35 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
36 |
+
loss_cls=dict(
|
37 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
38 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
39 |
+
roi_head=dict(
|
40 |
+
type='SelfSupStandardRoIHead',
|
41 |
+
bbox_roi_extractor=dict(
|
42 |
+
type='SingleRoIExtractor',
|
43 |
+
roi_layer=dict(
|
44 |
+
type='RoIAlign', output_size=7, sampling_ratio=0),
|
45 |
+
out_channels=256,
|
46 |
+
featmap_strides=[4, 8, 16, 32]),
|
47 |
+
bbox_head=dict(
|
48 |
+
type='SelfSupShared4Conv1FCBBoxHead',
|
49 |
+
in_channels=256,
|
50 |
+
num_classes=256,
|
51 |
+
roi_feat_size=7,
|
52 |
+
loss_cls=dict(
|
53 |
+
type='ContrastiveLoss', loss_weight=1.0, temperature=0.2)),
|
54 |
+
mask_roi_extractor=None,
|
55 |
+
mask_head=None),
|
56 |
+
train_cfg=dict(
|
57 |
+
rpn=dict(
|
58 |
+
assigner=dict(
|
59 |
+
type='MaxIoUAssigner',
|
60 |
+
pos_iou_thr=0.7,
|
61 |
+
neg_iou_thr=0.3,
|
62 |
+
min_pos_iou=0.3,
|
63 |
+
match_low_quality=True,
|
64 |
+
ignore_iof_thr=-1),
|
65 |
+
sampler=dict(
|
66 |
+
type='RandomSampler',
|
67 |
+
num=4096,
|
68 |
+
pos_fraction=1.0,
|
69 |
+
neg_pos_ub=-1,
|
70 |
+
add_gt_as_proposals=False),
|
71 |
+
allowed_border=-1,
|
72 |
+
pos_weight=-1,
|
73 |
+
debug=False),
|
74 |
+
rpn_proposal=dict(
|
75 |
+
nms_pre=2000,
|
76 |
+
max_per_img=1000,
|
77 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
78 |
+
min_bbox_size=0),
|
79 |
+
rcnn=dict(
|
80 |
+
assigner=dict(
|
81 |
+
type='MaxIoUAssigner',
|
82 |
+
pos_iou_thr=0.5,
|
83 |
+
neg_iou_thr=0.5,
|
84 |
+
min_pos_iou=0.5,
|
85 |
+
match_low_quality=True,
|
86 |
+
ignore_iof_thr=-1,
|
87 |
+
gt_max_assign_all=False),
|
88 |
+
sampler=dict(
|
89 |
+
type='RandomSampler',
|
90 |
+
num=4096,
|
91 |
+
pos_fraction=1,
|
92 |
+
neg_pos_ub=0,
|
93 |
+
add_gt_as_proposals=True),
|
94 |
+
mask_size=28,
|
95 |
+
pos_weight=-1,
|
96 |
+
debug=False)),
|
97 |
+
test_cfg=dict(
|
98 |
+
rpn=dict(
|
99 |
+
nms_pre=1000,
|
100 |
+
max_per_img=1000,
|
101 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
102 |
+
min_bbox_size=0),
|
103 |
+
rcnn=dict(
|
104 |
+
score_thr=0.05,
|
105 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
106 |
+
max_per_img=100,
|
107 |
+
mask_thr_binary=0.5))))
|
108 |
+
train_dataset_type = 'MultiViewCocoDataset'
|
109 |
+
test_dataset_type = 'CocoDataset'
|
110 |
+
data_root = 'data/coco/'
|
111 |
+
classes = ['selective_search']
|
112 |
+
img_norm_cfg = dict(
|
113 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
114 |
+
load_pipeline = [
|
115 |
+
dict(type='LoadImageFromFile'),
|
116 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
117 |
+
]
|
118 |
+
train_pipeline1 = [
|
119 |
+
dict(
|
120 |
+
type='Resize',
|
121 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
122 |
+
(1333, 768), (1333, 800)],
|
123 |
+
multiscale_mode='value',
|
124 |
+
keep_ratio=True),
|
125 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
126 |
+
dict(type='Pad', size_divisor=32),
|
127 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
128 |
+
dict(
|
129 |
+
type='OneOf',
|
130 |
+
transforms=[
|
131 |
+
dict(type='Identity'),
|
132 |
+
dict(type='AutoContrast'),
|
133 |
+
dict(type='RandEqualize'),
|
134 |
+
dict(type='RandSolarize'),
|
135 |
+
dict(type='RandColor'),
|
136 |
+
dict(type='RandContrast'),
|
137 |
+
dict(type='RandBrightness'),
|
138 |
+
dict(type='RandSharpness'),
|
139 |
+
dict(type='RandPosterize')
|
140 |
+
]),
|
141 |
+
dict(
|
142 |
+
type='Normalize',
|
143 |
+
mean=[123.675, 116.28, 103.53],
|
144 |
+
std=[58.395, 57.12, 57.375],
|
145 |
+
to_rgb=True),
|
146 |
+
dict(type='DefaultFormatBundle'),
|
147 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
148 |
+
]
|
149 |
+
train_pipeline2 = [
|
150 |
+
dict(
|
151 |
+
type='Resize',
|
152 |
+
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
153 |
+
(1333, 768), (1333, 800)],
|
154 |
+
multiscale_mode='value',
|
155 |
+
keep_ratio=True),
|
156 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
|
157 |
+
dict(type='Pad', size_divisor=32),
|
158 |
+
dict(type='RandFlip', flip_ratio=0.5),
|
159 |
+
dict(
|
160 |
+
type='OneOf',
|
161 |
+
transforms=[
|
162 |
+
dict(type='Identity'),
|
163 |
+
dict(type='AutoContrast'),
|
164 |
+
dict(type='RandEqualize'),
|
165 |
+
dict(type='RandSolarize'),
|
166 |
+
dict(type='RandColor'),
|
167 |
+
dict(type='RandContrast'),
|
168 |
+
dict(type='RandBrightness'),
|
169 |
+
dict(type='RandSharpness'),
|
170 |
+
dict(type='RandPosterize')
|
171 |
+
]),
|
172 |
+
dict(
|
173 |
+
type='Normalize',
|
174 |
+
mean=[123.675, 116.28, 103.53],
|
175 |
+
std=[58.395, 57.12, 57.375],
|
176 |
+
to_rgb=True),
|
177 |
+
dict(type='DefaultFormatBundle'),
|
178 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
179 |
+
]
|
180 |
+
test_pipeline = [
|
181 |
+
dict(type='LoadImageFromFile'),
|
182 |
+
dict(
|
183 |
+
type='MultiScaleFlipAug',
|
184 |
+
img_scale=(1333, 800),
|
185 |
+
flip=False,
|
186 |
+
transforms=[
|
187 |
+
dict(type='Resize', keep_ratio=True),
|
188 |
+
dict(type='RandomFlip'),
|
189 |
+
dict(
|
190 |
+
type='Normalize',
|
191 |
+
mean=[123.675, 116.28, 103.53],
|
192 |
+
std=[58.395, 57.12, 57.375],
|
193 |
+
to_rgb=True),
|
194 |
+
dict(type='Pad', size_divisor=32),
|
195 |
+
dict(type='ImageToTensor', keys=['img']),
|
196 |
+
dict(type='Collect', keys=['img'])
|
197 |
+
])
|
198 |
+
]
|
199 |
+
data = dict(
|
200 |
+
samples_per_gpu=2,
|
201 |
+
workers_per_gpu=2,
|
202 |
+
train=dict(
|
203 |
+
type='MultiViewCocoDataset',
|
204 |
+
dataset=dict(
|
205 |
+
type='CocoDataset',
|
206 |
+
classes=['selective_search'],
|
207 |
+
ann_file=
|
208 |
+
'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
|
209 |
+
img_prefix='data/coco/train2017/',
|
210 |
+
pipeline=[
|
211 |
+
dict(type='LoadImageFromFile'),
|
212 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
|
213 |
+
]),
|
214 |
+
num_views=2,
|
215 |
+
pipelines=[[{
|
216 |
+
'type':
|
217 |
+
'Resize',
|
218 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
|
219 |
+
(1333, 768), (1333, 800)],
|
220 |
+
'multiscale_mode':
|
221 |
+
'value',
|
222 |
+
'keep_ratio':
|
223 |
+
True
|
224 |
+
}, {
|
225 |
+
'type': 'FilterAnnotations',
|
226 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
227 |
+
}, {
|
228 |
+
'type': 'Pad',
|
229 |
+
'size_divisor': 32
|
230 |
+
}, {
|
231 |
+
'type': 'RandFlip',
|
232 |
+
'flip_ratio': 0.5
|
233 |
+
}, {
|
234 |
+
'type':
|
235 |
+
'OneOf',
|
236 |
+
'transforms': [{
|
237 |
+
'type': 'Identity'
|
238 |
+
}, {
|
239 |
+
'type': 'AutoContrast'
|
240 |
+
}, {
|
241 |
+
'type': 'RandEqualize'
|
242 |
+
}, {
|
243 |
+
'type': 'RandSolarize'
|
244 |
+
}, {
|
245 |
+
'type': 'RandColor'
|
246 |
+
}, {
|
247 |
+
'type': 'RandContrast'
|
248 |
+
}, {
|
249 |
+
'type': 'RandBrightness'
|
250 |
+
}, {
|
251 |
+
'type': 'RandSharpness'
|
252 |
+
}, {
|
253 |
+
'type': 'RandPosterize'
|
254 |
+
}]
|
255 |
+
}, {
|
256 |
+
'type': 'Normalize',
|
257 |
+
'mean': [123.675, 116.28, 103.53],
|
258 |
+
'std': [58.395, 57.12, 57.375],
|
259 |
+
'to_rgb': True
|
260 |
+
}, {
|
261 |
+
'type': 'DefaultFormatBundle'
|
262 |
+
}, {
|
263 |
+
'type': 'Collect',
|
264 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
265 |
+
}],
|
266 |
+
[{
|
267 |
+
'type':
|
268 |
+
'Resize',
|
269 |
+
'img_scale': [(1333, 640), (1333, 672), (1333, 704),
|
270 |
+
(1333, 736), (1333, 768), (1333, 800)],
|
271 |
+
'multiscale_mode':
|
272 |
+
'value',
|
273 |
+
'keep_ratio':
|
274 |
+
True
|
275 |
+
}, {
|
276 |
+
'type': 'FilterAnnotations',
|
277 |
+
'min_gt_bbox_wh': (0.01, 0.01)
|
278 |
+
}, {
|
279 |
+
'type': 'Pad',
|
280 |
+
'size_divisor': 32
|
281 |
+
}, {
|
282 |
+
'type': 'RandFlip',
|
283 |
+
'flip_ratio': 0.5
|
284 |
+
}, {
|
285 |
+
'type':
|
286 |
+
'OneOf',
|
287 |
+
'transforms': [{
|
288 |
+
'type': 'Identity'
|
289 |
+
}, {
|
290 |
+
'type': 'AutoContrast'
|
291 |
+
}, {
|
292 |
+
'type': 'RandEqualize'
|
293 |
+
}, {
|
294 |
+
'type': 'RandSolarize'
|
295 |
+
}, {
|
296 |
+
'type': 'RandColor'
|
297 |
+
}, {
|
298 |
+
'type': 'RandContrast'
|
299 |
+
}, {
|
300 |
+
'type': 'RandBrightness'
|
301 |
+
}, {
|
302 |
+
'type': 'RandSharpness'
|
303 |
+
}, {
|
304 |
+
'type': 'RandPosterize'
|
305 |
+
}]
|
306 |
+
}, {
|
307 |
+
'type': 'Normalize',
|
308 |
+
'mean': [123.675, 116.28, 103.53],
|
309 |
+
'std': [58.395, 57.12, 57.375],
|
310 |
+
'to_rgb': True
|
311 |
+
}, {
|
312 |
+
'type': 'DefaultFormatBundle'
|
313 |
+
}, {
|
314 |
+
'type': 'Collect',
|
315 |
+
'keys': ['img', 'gt_bboxes', 'gt_labels']
|
316 |
+
}]]),
|
317 |
+
val=dict(
|
318 |
+
type='CocoDataset',
|
319 |
+
classes=['selective_search'],
|
320 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
321 |
+
img_prefix='data/coco/val2017/',
|
322 |
+
pipeline=[
|
323 |
+
dict(type='LoadImageFromFile'),
|
324 |
+
dict(
|
325 |
+
type='MultiScaleFlipAug',
|
326 |
+
img_scale=(1333, 800),
|
327 |
+
flip=False,
|
328 |
+
transforms=[
|
329 |
+
dict(type='Resize', keep_ratio=True),
|
330 |
+
dict(type='RandomFlip'),
|
331 |
+
dict(
|
332 |
+
type='Normalize',
|
333 |
+
mean=[123.675, 116.28, 103.53],
|
334 |
+
std=[58.395, 57.12, 57.375],
|
335 |
+
to_rgb=True),
|
336 |
+
dict(type='Pad', size_divisor=32),
|
337 |
+
dict(type='ImageToTensor', keys=['img']),
|
338 |
+
dict(type='Collect', keys=['img'])
|
339 |
+
])
|
340 |
+
]),
|
341 |
+
test=dict(
|
342 |
+
type='CocoDataset',
|
343 |
+
classes=['selective_search'],
|
344 |
+
ann_file='data/coco/annotations/instances_val2017.json',
|
345 |
+
img_prefix='data/coco/val2017/',
|
346 |
+
pipeline=[
|
347 |
+
dict(type='LoadImageFromFile'),
|
348 |
+
dict(
|
349 |
+
type='MultiScaleFlipAug',
|
350 |
+
img_scale=(1333, 800),
|
351 |
+
flip=False,
|
352 |
+
transforms=[
|
353 |
+
dict(type='Resize', keep_ratio=True),
|
354 |
+
dict(type='RandomFlip'),
|
355 |
+
dict(
|
356 |
+
type='Normalize',
|
357 |
+
mean=[123.675, 116.28, 103.53],
|
358 |
+
std=[58.395, 57.12, 57.375],
|
359 |
+
to_rgb=True),
|
360 |
+
dict(type='Pad', size_divisor=32),
|
361 |
+
dict(type='ImageToTensor', keys=['img']),
|
362 |
+
dict(type='Collect', keys=['img'])
|
363 |
+
])
|
364 |
+
]))
|
365 |
+
evaluation = dict(metric='bbox', interval=65535)
|
366 |
+
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
367 |
+
optimizer_config = dict(grad_clip=None)
|
368 |
+
lr_config = dict(
|
369 |
+
policy='step',
|
370 |
+
warmup='linear',
|
371 |
+
warmup_iters=500,
|
372 |
+
warmup_ratio=0.001,
|
373 |
+
step=[8, 11])
|
374 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
375 |
+
checkpoint_config = dict(interval=1)
|
376 |
+
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
377 |
+
custom_hooks = [
|
378 |
+
dict(type='MomentumUpdateHook'),
|
379 |
+
dict(
|
380 |
+
type='MMDetWandbHook',
|
381 |
+
init_kwargs=dict(project='mmdet_pretrain', group='pretrain'),
|
382 |
+
interval=50,
|
383 |
+
num_eval_images=0,
|
384 |
+
log_checkpoint=False)
|
385 |
+
]
|
386 |
+
dist_params = dict(backend='nccl')
|
387 |
+
log_level = 'INFO'
|
388 |
+
load_from = None
|
389 |
+
resume_from = None
|
390 |
+
workflow = [('train', 1)]
|
391 |
+
opencv_num_threads = 0
|
392 |
+
mp_start_method = 'fork'
|
393 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
394 |
+
custom_imports = dict(
|
395 |
+
imports=[
|
396 |
+
'mmselfsup.core', 'mmselfsup.datasets.pipelines',
|
397 |
+
'selfsup.datasets.pipelines.selfsup_pipelines',
|
398 |
+
'selfsup.datasets.pipelines.rand_aug',
|
399 |
+
'selfsup.datasets.single_view_coco',
|
400 |
+
'selfsup.datasets.multi_view_coco',
|
401 |
+
'selfsup.models.losses.contrastive_loss',
|
402 |
+
'selfsup.models.dense_heads.fcos_head',
|
403 |
+
'selfsup.models.dense_heads.retina_head',
|
404 |
+
'selfsup.models.dense_heads.detr_head',
|
405 |
+
'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
|
406 |
+
'selfsup.models.roi_heads.standard_roi_head',
|
407 |
+
'selfsup.models.detectors.selfsup_detector',
|
408 |
+
'selfsup.models.detectors.selfsup_fcos',
|
409 |
+
'selfsup.models.detectors.selfsup_detr',
|
410 |
+
'selfsup.models.detectors.selfsup_retinanet',
|
411 |
+
'selfsup.models.detectors.selfsup_mask_rcnn',
|
412 |
+
'selfsup.core.bbox.match_costs.match_cost'
|
413 |
+
],
|
414 |
+
allow_failed_imports=False)
|
415 |
+
work_dir = 'work_dirs/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain'
|
416 |
+
auto_resume = False
|
417 |
+
gpu_ids = range(0, 8)
|