limingcv commited on
Commit
b566f90
1 Parent(s): be33604

Upload with huggingface_hub

Browse files
Files changed (15) hide show
  1. pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log +0 -0
  2. pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log.json +0 -0
  3. pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/detr_pseudo_label.py +424 -0
  4. pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log +0 -0
  5. pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log.json +0 -0
  6. pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/final_model.pth +3 -0
  7. pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/selfsup_mask-rcnn_swin-b_simmim.py +447 -0
  8. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log +0 -0
  9. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log.json +0 -0
  10. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/final_model.pth +3 -0
  11. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/mask_rcnn.py +417 -0
  12. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log +0 -0
  13. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log.json +0 -0
  14. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/final_model.pth +3 -0
  15. pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/mask_rcnn.py +417 -0
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/20221026_193523.log.json ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_detr_cluster-ids-as-pseudo-labels/detr_pseudo_label.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='DETR',
3
+ backbone=dict(
4
+ type='ResNet',
5
+ depth=50,
6
+ num_stages=4,
7
+ out_indices=(3, ),
8
+ frozen_stages=4,
9
+ norm_cfg=dict(type='BN', requires_grad=False),
10
+ norm_eval=True,
11
+ style='pytorch',
12
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(
14
+ type='DETRHead',
15
+ num_classes=256,
16
+ in_channels=2048,
17
+ transformer=dict(
18
+ type='Transformer',
19
+ encoder=dict(
20
+ type='DetrTransformerEncoder',
21
+ num_layers=6,
22
+ transformerlayers=dict(
23
+ type='BaseTransformerLayer',
24
+ attn_cfgs=[
25
+ dict(
26
+ type='MultiheadAttention',
27
+ embed_dims=256,
28
+ num_heads=8,
29
+ dropout=0.1)
30
+ ],
31
+ feedforward_channels=2048,
32
+ ffn_dropout=0.1,
33
+ operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
34
+ decoder=dict(
35
+ type='DetrTransformerDecoder',
36
+ return_intermediate=True,
37
+ num_layers=6,
38
+ transformerlayers=dict(
39
+ type='DetrTransformerDecoderLayer',
40
+ attn_cfgs=dict(
41
+ type='MultiheadAttention',
42
+ embed_dims=256,
43
+ num_heads=8,
44
+ dropout=0.1),
45
+ feedforward_channels=2048,
46
+ ffn_dropout=0.1,
47
+ operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
48
+ 'ffn', 'norm')))),
49
+ positional_encoding=dict(
50
+ type='SinePositionalEncoding', num_feats=128, normalize=True),
51
+ loss_cls=dict(
52
+ type='CrossEntropyLoss',
53
+ bg_cls_weight=0.1,
54
+ use_sigmoid=False,
55
+ loss_weight=1.0,
56
+ class_weight=1.0),
57
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
58
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
59
+ train_cfg=dict(
60
+ assigner=dict(
61
+ type='HungarianAssigner',
62
+ cls_cost=dict(type='ClassificationCost', weight=1.0),
63
+ reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
64
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
65
+ test_cfg=dict(max_per_img=100))
66
+ dataset_type = 'CocoDataset'
67
+ data_root = 'data/coco/'
68
+ img_norm_cfg = dict(
69
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
70
+ train_pipeline = [
71
+ dict(type='LoadImageFromFile'),
72
+ dict(type='LoadAnnotations', with_bbox=True),
73
+ dict(type='RandomFlip', flip_ratio=0.5),
74
+ dict(
75
+ type='AutoAugment',
76
+ policies=[[{
77
+ 'type':
78
+ 'Resize',
79
+ 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333),
80
+ (608, 1333), (640, 1333), (672, 1333), (704, 1333),
81
+ (736, 1333), (768, 1333), (800, 1333)],
82
+ 'multiscale_mode':
83
+ 'value',
84
+ 'keep_ratio':
85
+ True
86
+ }],
87
+ [{
88
+ 'type': 'Resize',
89
+ 'img_scale': [(400, 1333), (500, 1333), (600, 1333)],
90
+ 'multiscale_mode': 'value',
91
+ 'keep_ratio': True
92
+ }, {
93
+ 'type': 'RandomCrop',
94
+ 'crop_type': 'absolute_range',
95
+ 'crop_size': (384, 600),
96
+ 'allow_negative_crop': True
97
+ }, {
98
+ 'type':
99
+ 'Resize',
100
+ 'img_scale': [(480, 1333), (512, 1333), (544, 1333),
101
+ (576, 1333), (608, 1333), (640, 1333),
102
+ (672, 1333), (704, 1333), (736, 1333),
103
+ (768, 1333), (800, 1333)],
104
+ 'multiscale_mode':
105
+ 'value',
106
+ 'override':
107
+ True,
108
+ 'keep_ratio':
109
+ True
110
+ }]]),
111
+ dict(
112
+ type='Normalize',
113
+ mean=[123.675, 116.28, 103.53],
114
+ std=[58.395, 57.12, 57.375],
115
+ to_rgb=True),
116
+ dict(type='Pad', size_divisor=1),
117
+ dict(type='DefaultFormatBundle'),
118
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
119
+ ]
120
+ test_pipeline = [
121
+ dict(type='LoadImageFromFile'),
122
+ dict(
123
+ type='MultiScaleFlipAug',
124
+ img_scale=(1333, 800),
125
+ flip=False,
126
+ transforms=[
127
+ dict(type='Resize', keep_ratio=True),
128
+ dict(type='RandomFlip'),
129
+ dict(
130
+ type='Normalize',
131
+ mean=[123.675, 116.28, 103.53],
132
+ std=[58.395, 57.12, 57.375],
133
+ to_rgb=True),
134
+ dict(type='Pad', size_divisor=32),
135
+ dict(type='ImageToTensor', keys=['img']),
136
+ dict(type='Collect', keys=['img'])
137
+ ])
138
+ ]
139
+ data = dict(
140
+ samples_per_gpu=2,
141
+ workers_per_gpu=2,
142
+ train=dict(
143
+ type='CocoDataset',
144
+ ann_file='train2017_ratio3size0008@0.5_cluster-id-as-class.json',
145
+ img_prefix='data/coco/train2017/',
146
+ pipeline=[
147
+ dict(type='LoadImageFromFile'),
148
+ dict(type='LoadAnnotations', with_bbox=True),
149
+ dict(type='RandomFlip', flip_ratio=0.5),
150
+ dict(
151
+ type='AutoAugment',
152
+ policies=[[{
153
+ 'type':
154
+ 'Resize',
155
+ 'img_scale': [(480, 1333), (512, 1333), (544, 1333),
156
+ (576, 1333), (608, 1333), (640, 1333),
157
+ (672, 1333), (704, 1333), (736, 1333),
158
+ (768, 1333), (800, 1333)],
159
+ 'multiscale_mode':
160
+ 'value',
161
+ 'keep_ratio':
162
+ True
163
+ }],
164
+ [{
165
+ 'type': 'Resize',
166
+ 'img_scale': [(400, 1333), (500, 1333),
167
+ (600, 1333)],
168
+ 'multiscale_mode': 'value',
169
+ 'keep_ratio': True
170
+ }, {
171
+ 'type': 'RandomCrop',
172
+ 'crop_type': 'absolute_range',
173
+ 'crop_size': (384, 600),
174
+ 'allow_negative_crop': True
175
+ }, {
176
+ 'type':
177
+ 'Resize',
178
+ 'img_scale': [(480, 1333), (512, 1333),
179
+ (544, 1333), (576, 1333),
180
+ (608, 1333), (640, 1333),
181
+ (672, 1333), (704, 1333),
182
+ (736, 1333), (768, 1333),
183
+ (800, 1333)],
184
+ 'multiscale_mode':
185
+ 'value',
186
+ 'override':
187
+ True,
188
+ 'keep_ratio':
189
+ True
190
+ }]]),
191
+ dict(
192
+ type='Normalize',
193
+ mean=[123.675, 116.28, 103.53],
194
+ std=[58.395, 57.12, 57.375],
195
+ to_rgb=True),
196
+ dict(type='Pad', size_divisor=1),
197
+ dict(type='DefaultFormatBundle'),
198
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
199
+ ],
200
+ classes=[
201
+ 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5',
202
+ 'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10',
203
+ 'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14',
204
+ 'cluster_15', 'cluster_16', 'cluster_17', 'cluster_18',
205
+ 'cluster_19', 'cluster_20', 'cluster_21', 'cluster_22',
206
+ 'cluster_23', 'cluster_24', 'cluster_25', 'cluster_26',
207
+ 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30',
208
+ 'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34',
209
+ 'cluster_35', 'cluster_36', 'cluster_37', 'cluster_38',
210
+ 'cluster_39', 'cluster_40', 'cluster_41', 'cluster_42',
211
+ 'cluster_43', 'cluster_44', 'cluster_45', 'cluster_46',
212
+ 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50',
213
+ 'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54',
214
+ 'cluster_55', 'cluster_56', 'cluster_57', 'cluster_58',
215
+ 'cluster_59', 'cluster_60', 'cluster_61', 'cluster_62',
216
+ 'cluster_63', 'cluster_64', 'cluster_65', 'cluster_66',
217
+ 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70',
218
+ 'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74',
219
+ 'cluster_75', 'cluster_76', 'cluster_77', 'cluster_78',
220
+ 'cluster_79', 'cluster_80', 'cluster_81', 'cluster_82',
221
+ 'cluster_83', 'cluster_84', 'cluster_85', 'cluster_86',
222
+ 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90',
223
+ 'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94',
224
+ 'cluster_95', 'cluster_96', 'cluster_97', 'cluster_98',
225
+ 'cluster_99', 'cluster_100', 'cluster_101', 'cluster_102',
226
+ 'cluster_103', 'cluster_104', 'cluster_105', 'cluster_106',
227
+ 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110',
228
+ 'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114',
229
+ 'cluster_115', 'cluster_116', 'cluster_117', 'cluster_118',
230
+ 'cluster_119', 'cluster_120', 'cluster_121', 'cluster_122',
231
+ 'cluster_123', 'cluster_124', 'cluster_125', 'cluster_126',
232
+ 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130',
233
+ 'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134',
234
+ 'cluster_135', 'cluster_136', 'cluster_137', 'cluster_138',
235
+ 'cluster_139', 'cluster_140', 'cluster_141', 'cluster_142',
236
+ 'cluster_143', 'cluster_144', 'cluster_145', 'cluster_146',
237
+ 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150',
238
+ 'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154',
239
+ 'cluster_155', 'cluster_156', 'cluster_157', 'cluster_158',
240
+ 'cluster_159', 'cluster_160', 'cluster_161', 'cluster_162',
241
+ 'cluster_163', 'cluster_164', 'cluster_165', 'cluster_166',
242
+ 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170',
243
+ 'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174',
244
+ 'cluster_175', 'cluster_176', 'cluster_177', 'cluster_178',
245
+ 'cluster_179', 'cluster_180', 'cluster_181', 'cluster_182',
246
+ 'cluster_183', 'cluster_184', 'cluster_185', 'cluster_186',
247
+ 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190',
248
+ 'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194',
249
+ 'cluster_195', 'cluster_196', 'cluster_197', 'cluster_198',
250
+ 'cluster_199', 'cluster_200', 'cluster_201', 'cluster_202',
251
+ 'cluster_203', 'cluster_204', 'cluster_205', 'cluster_206',
252
+ 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210',
253
+ 'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214',
254
+ 'cluster_215', 'cluster_216', 'cluster_217', 'cluster_218',
255
+ 'cluster_219', 'cluster_220', 'cluster_221', 'cluster_222',
256
+ 'cluster_223', 'cluster_224', 'cluster_225', 'cluster_226',
257
+ 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230',
258
+ 'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234',
259
+ 'cluster_235', 'cluster_236', 'cluster_237', 'cluster_238',
260
+ 'cluster_239', 'cluster_240', 'cluster_241', 'cluster_242',
261
+ 'cluster_243', 'cluster_244', 'cluster_245', 'cluster_246',
262
+ 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250',
263
+ 'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254',
264
+ 'cluster_255', 'cluster_256'
265
+ ]),
266
+ val=dict(
267
+ type='CocoDataset',
268
+ ann_file='data/coco/annotations/instances_val2017.json',
269
+ img_prefix='data/coco/val2017/',
270
+ pipeline=[
271
+ dict(type='LoadImageFromFile'),
272
+ dict(
273
+ type='MultiScaleFlipAug',
274
+ img_scale=(1333, 800),
275
+ flip=False,
276
+ transforms=[
277
+ dict(type='Resize', keep_ratio=True),
278
+ dict(type='RandomFlip'),
279
+ dict(
280
+ type='Normalize',
281
+ mean=[123.675, 116.28, 103.53],
282
+ std=[58.395, 57.12, 57.375],
283
+ to_rgb=True),
284
+ dict(type='Pad', size_divisor=32),
285
+ dict(type='ImageToTensor', keys=['img']),
286
+ dict(type='Collect', keys=['img'])
287
+ ])
288
+ ]),
289
+ test=dict(
290
+ type='CocoDataset',
291
+ ann_file='data/coco/annotations/instances_val2017.json',
292
+ img_prefix='data/coco/val2017/',
293
+ pipeline=[
294
+ dict(type='LoadImageFromFile'),
295
+ dict(
296
+ type='MultiScaleFlipAug',
297
+ img_scale=(1333, 800),
298
+ flip=False,
299
+ transforms=[
300
+ dict(type='Resize', keep_ratio=True),
301
+ dict(type='RandomFlip'),
302
+ dict(
303
+ type='Normalize',
304
+ mean=[123.675, 116.28, 103.53],
305
+ std=[58.395, 57.12, 57.375],
306
+ to_rgb=True),
307
+ dict(type='Pad', size_divisor=32),
308
+ dict(type='ImageToTensor', keys=['img']),
309
+ dict(type='Collect', keys=['img'])
310
+ ])
311
+ ]))
312
+ evaluation = dict(
313
+ interval=65535, metric='bbox', save_best='auto', gpu_collect=True)
314
+ checkpoint_config = dict(interval=1)
315
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
316
+ custom_hooks = [
317
+ dict(type='NumClassCheckHook'),
318
+ dict(
319
+ type='MMDetWandbHook',
320
+ init_kwargs=dict(project='I2B', group='finetune'),
321
+ interval=50,
322
+ num_eval_images=0,
323
+ log_checkpoint=False)
324
+ ]
325
+ dist_params = dict(backend='nccl')
326
+ log_level = 'INFO'
327
+ load_from = None
328
+ resume_from = None
329
+ workflow = [('train', 1)]
330
+ opencv_num_threads = 0
331
+ mp_start_method = 'fork'
332
+ auto_scale_lr = dict(enable=True, base_batch_size=64)
333
+ custom_imports = dict(
334
+ imports=[
335
+ 'mmselfsup.datasets.pipelines',
336
+ 'selfsup.core.hook.momentum_update_hook',
337
+ 'selfsup.datasets.pipelines.selfsup_pipelines',
338
+ 'selfsup.datasets.pipelines.rand_aug',
339
+ 'selfsup.datasets.single_view_coco',
340
+ 'selfsup.datasets.multi_view_coco',
341
+ 'selfsup.models.losses.contrastive_loss',
342
+ 'selfsup.models.dense_heads.fcos_head',
343
+ 'selfsup.models.dense_heads.retina_head',
344
+ 'selfsup.models.dense_heads.detr_head',
345
+ 'selfsup.models.dense_heads.deformable_detr_head',
346
+ 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
347
+ 'selfsup.models.roi_heads.standard_roi_head',
348
+ 'selfsup.models.detectors.selfsup_detector',
349
+ 'selfsup.models.detectors.selfsup_fcos',
350
+ 'selfsup.models.detectors.selfsup_detr',
351
+ 'selfsup.models.detectors.selfsup_deformable_detr',
352
+ 'selfsup.models.detectors.selfsup_retinanet',
353
+ 'selfsup.models.detectors.selfsup_mask_rcnn',
354
+ 'selfsup.core.bbox.assigners.hungarian_assigner',
355
+ 'selfsup.core.bbox.assigners.pseudo_hungarian_assigner',
356
+ 'selfsup.core.bbox.match_costs.match_cost'
357
+ ],
358
+ allow_failed_imports=False)
359
+ classes = [
360
+ 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5',
361
+ 'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10',
362
+ 'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', 'cluster_15',
363
+ 'cluster_16', 'cluster_17', 'cluster_18', 'cluster_19', 'cluster_20',
364
+ 'cluster_21', 'cluster_22', 'cluster_23', 'cluster_24', 'cluster_25',
365
+ 'cluster_26', 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30',
366
+ 'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', 'cluster_35',
367
+ 'cluster_36', 'cluster_37', 'cluster_38', 'cluster_39', 'cluster_40',
368
+ 'cluster_41', 'cluster_42', 'cluster_43', 'cluster_44', 'cluster_45',
369
+ 'cluster_46', 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50',
370
+ 'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', 'cluster_55',
371
+ 'cluster_56', 'cluster_57', 'cluster_58', 'cluster_59', 'cluster_60',
372
+ 'cluster_61', 'cluster_62', 'cluster_63', 'cluster_64', 'cluster_65',
373
+ 'cluster_66', 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70',
374
+ 'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', 'cluster_75',
375
+ 'cluster_76', 'cluster_77', 'cluster_78', 'cluster_79', 'cluster_80',
376
+ 'cluster_81', 'cluster_82', 'cluster_83', 'cluster_84', 'cluster_85',
377
+ 'cluster_86', 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90',
378
+ 'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', 'cluster_95',
379
+ 'cluster_96', 'cluster_97', 'cluster_98', 'cluster_99', 'cluster_100',
380
+ 'cluster_101', 'cluster_102', 'cluster_103', 'cluster_104', 'cluster_105',
381
+ 'cluster_106', 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110',
382
+ 'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', 'cluster_115',
383
+ 'cluster_116', 'cluster_117', 'cluster_118', 'cluster_119', 'cluster_120',
384
+ 'cluster_121', 'cluster_122', 'cluster_123', 'cluster_124', 'cluster_125',
385
+ 'cluster_126', 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130',
386
+ 'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', 'cluster_135',
387
+ 'cluster_136', 'cluster_137', 'cluster_138', 'cluster_139', 'cluster_140',
388
+ 'cluster_141', 'cluster_142', 'cluster_143', 'cluster_144', 'cluster_145',
389
+ 'cluster_146', 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150',
390
+ 'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', 'cluster_155',
391
+ 'cluster_156', 'cluster_157', 'cluster_158', 'cluster_159', 'cluster_160',
392
+ 'cluster_161', 'cluster_162', 'cluster_163', 'cluster_164', 'cluster_165',
393
+ 'cluster_166', 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170',
394
+ 'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', 'cluster_175',
395
+ 'cluster_176', 'cluster_177', 'cluster_178', 'cluster_179', 'cluster_180',
396
+ 'cluster_181', 'cluster_182', 'cluster_183', 'cluster_184', 'cluster_185',
397
+ 'cluster_186', 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190',
398
+ 'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', 'cluster_195',
399
+ 'cluster_196', 'cluster_197', 'cluster_198', 'cluster_199', 'cluster_200',
400
+ 'cluster_201', 'cluster_202', 'cluster_203', 'cluster_204', 'cluster_205',
401
+ 'cluster_206', 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210',
402
+ 'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', 'cluster_215',
403
+ 'cluster_216', 'cluster_217', 'cluster_218', 'cluster_219', 'cluster_220',
404
+ 'cluster_221', 'cluster_222', 'cluster_223', 'cluster_224', 'cluster_225',
405
+ 'cluster_226', 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230',
406
+ 'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', 'cluster_235',
407
+ 'cluster_236', 'cluster_237', 'cluster_238', 'cluster_239', 'cluster_240',
408
+ 'cluster_241', 'cluster_242', 'cluster_243', 'cluster_244', 'cluster_245',
409
+ 'cluster_246', 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250',
410
+ 'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', 'cluster_255',
411
+ 'cluster_256'
412
+ ]
413
+ optimizer = dict(
414
+ type='AdamW',
415
+ lr=0.0002,
416
+ weight_decay=0.0001,
417
+ paramwise_cfg=dict(
418
+ custom_keys=dict(backbone=dict(lr_mult=0, decay_mult=0))))
419
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
420
+ lr_config = dict(policy='step', step=[40])
421
+ runner = dict(type='EpochBasedRunner', max_epochs=50)
422
+ work_dir = 'work_dirs/selfsup_detr_cluster-ids-as-pseudo-labels'
423
+ auto_resume = False
424
+ gpu_ids = range(0, 32)
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/20230120_091015.log.json ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/final_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aca88dfee95a9cb04041b5b93a19169aaa3bb14ff12c237042bc981205d85ab
3
+ size 422177783
pretrain/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain/selfsup_mask-rcnn_swin-b_simmim.py ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='SelfSupDetector',
3
+ backbone=dict(
4
+ type='SelfSupMaskRCNN',
5
+ backbone=dict(
6
+ type='SwinTransformer',
7
+ embed_dims=128,
8
+ depths=[2, 2, 18, 2],
9
+ num_heads=[4, 8, 16, 32],
10
+ window_size=7,
11
+ mlp_ratio=4,
12
+ qkv_bias=True,
13
+ qk_scale=None,
14
+ drop_rate=0.0,
15
+ attn_drop_rate=0.0,
16
+ drop_path_rate=0.2,
17
+ patch_norm=True,
18
+ out_indices=(0, 1, 2, 3),
19
+ with_cp=False,
20
+ frozen_stages=4,
21
+ convert_weights=True,
22
+ init_cfg=dict(
23
+ type='Pretrained',
24
+ checkpoint=
25
+ 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth'
26
+ )),
27
+ neck=dict(
28
+ type='FPN',
29
+ in_channels=[128, 256, 512, 1024],
30
+ out_channels=256,
31
+ num_outs=5),
32
+ rpn_head=dict(
33
+ type='RPNHead',
34
+ in_channels=256,
35
+ feat_channels=256,
36
+ anchor_generator=dict(
37
+ type='AnchorGenerator',
38
+ scales=[8],
39
+ ratios=[0.5, 1.0, 2.0],
40
+ strides=[4, 8, 16, 32, 64]),
41
+ bbox_coder=dict(
42
+ type='DeltaXYWHBBoxCoder',
43
+ target_means=[0.0, 0.0, 0.0, 0.0],
44
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
45
+ loss_cls=dict(
46
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
47
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
48
+ roi_head=dict(
49
+ type='SelfSupStandardRoIHead',
50
+ bbox_roi_extractor=dict(
51
+ type='SingleRoIExtractor',
52
+ roi_layer=dict(
53
+ type='RoIAlign', output_size=7, sampling_ratio=0),
54
+ out_channels=256,
55
+ featmap_strides=[4, 8, 16, 32]),
56
+ bbox_head=dict(
57
+ type='SelfSupShared4Conv1FCBBoxHead',
58
+ in_channels=256,
59
+ num_classes=256,
60
+ roi_feat_size=7,
61
+ reg_class_agnostic=False,
62
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0),
63
+ loss_cls=dict(
64
+ type='ContrastiveLoss', loss_weight=1.0, temperature=0.5)),
65
+ mask_roi_extractor=None,
66
+ mask_head=None),
67
+ train_cfg=dict(
68
+ rpn=dict(
69
+ assigner=dict(
70
+ type='MaxIoUAssigner',
71
+ pos_iou_thr=0.7,
72
+ neg_iou_thr=0.3,
73
+ min_pos_iou=0.3,
74
+ match_low_quality=True,
75
+ ignore_iof_thr=-1),
76
+ sampler=dict(
77
+ type='RandomSampler',
78
+ num=4096,
79
+ pos_fraction=1.0,
80
+ neg_pos_ub=-1,
81
+ add_gt_as_proposals=False),
82
+ allowed_border=-1,
83
+ pos_weight=-1,
84
+ debug=False),
85
+ rpn_proposal=dict(
86
+ nms_pre=2000,
87
+ max_per_img=1000,
88
+ nms=dict(type='nms', iou_threshold=0.7),
89
+ min_bbox_size=0),
90
+ rcnn=dict(
91
+ assigner=dict(
92
+ type='MaxIoUAssigner',
93
+ pos_iou_thr=0.5,
94
+ neg_iou_thr=0.5,
95
+ min_pos_iou=0.5,
96
+ match_low_quality=True,
97
+ ignore_iof_thr=-1,
98
+ gt_max_assign_all=False),
99
+ sampler=dict(
100
+ type='RandomSampler',
101
+ num=4096,
102
+ pos_fraction=1,
103
+ neg_pos_ub=0,
104
+ add_gt_as_proposals=True),
105
+ mask_size=28,
106
+ pos_weight=-1,
107
+ debug=False)),
108
+ test_cfg=dict(
109
+ rpn=dict(
110
+ nms_pre=1000,
111
+ max_per_img=1000,
112
+ nms=dict(type='nms', iou_threshold=0.7),
113
+ min_bbox_size=0),
114
+ rcnn=dict(
115
+ score_thr=0.05,
116
+ nms=dict(type='nms', iou_threshold=0.5),
117
+ max_per_img=100,
118
+ mask_thr_binary=0.5)),
119
+ init_cfg=dict(
120
+ type='Pretrained',
121
+ checkpoint='pretrain/simmim_swin-b_mmselfsup-pretrain.pth')))
122
+ train_dataset_type = 'MultiViewCocoDataset'
123
+ test_dataset_type = 'CocoDataset'
124
+ data_root = 'data/coco/'
125
+ classes = ['selective_search']
126
+ img_norm_cfg = dict(
127
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
128
+ load_pipeline = [
129
+ dict(type='LoadImageFromFile'),
130
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
131
+ ]
132
+ train_pipeline1 = [
133
+ dict(
134
+ type='Resize',
135
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
136
+ (1333, 768), (1333, 800)],
137
+ multiscale_mode='value',
138
+ keep_ratio=True),
139
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
140
+ dict(type='Pad', size_divisor=32),
141
+ dict(type='RandFlip', flip_ratio=0.5),
142
+ dict(
143
+ type='OneOf',
144
+ transforms=[
145
+ dict(type='Identity'),
146
+ dict(type='AutoContrast'),
147
+ dict(type='RandEqualize'),
148
+ dict(type='RandSolarize'),
149
+ dict(type='RandColor'),
150
+ dict(type='RandContrast'),
151
+ dict(type='RandBrightness'),
152
+ dict(type='RandSharpness'),
153
+ dict(type='RandPosterize')
154
+ ]),
155
+ dict(
156
+ type='Normalize',
157
+ mean=[123.675, 116.28, 103.53],
158
+ std=[58.395, 57.12, 57.375],
159
+ to_rgb=True),
160
+ dict(type='DefaultFormatBundle'),
161
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
162
+ ]
163
+ train_pipeline2 = [
164
+ dict(
165
+ type='Resize',
166
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
167
+ (1333, 768), (1333, 800)],
168
+ multiscale_mode='value',
169
+ keep_ratio=True),
170
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
171
+ dict(type='Pad', size_divisor=32),
172
+ dict(type='RandFlip', flip_ratio=0.5),
173
+ dict(
174
+ type='OneOf',
175
+ transforms=[
176
+ dict(type='Identity'),
177
+ dict(type='AutoContrast'),
178
+ dict(type='RandEqualize'),
179
+ dict(type='RandSolarize'),
180
+ dict(type='RandColor'),
181
+ dict(type='RandContrast'),
182
+ dict(type='RandBrightness'),
183
+ dict(type='RandSharpness'),
184
+ dict(type='RandPosterize')
185
+ ]),
186
+ dict(
187
+ type='Normalize',
188
+ mean=[123.675, 116.28, 103.53],
189
+ std=[58.395, 57.12, 57.375],
190
+ to_rgb=True),
191
+ dict(type='DefaultFormatBundle'),
192
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
193
+ ]
194
+ test_pipeline = [
195
+ dict(type='LoadImageFromFile'),
196
+ dict(
197
+ type='MultiScaleFlipAug',
198
+ img_scale=(1333, 800),
199
+ flip=False,
200
+ transforms=[
201
+ dict(type='Resize', keep_ratio=True),
202
+ dict(type='RandomFlip'),
203
+ dict(
204
+ type='Normalize',
205
+ mean=[123.675, 116.28, 103.53],
206
+ std=[58.395, 57.12, 57.375],
207
+ to_rgb=True),
208
+ dict(type='Pad', size_divisor=32),
209
+ dict(type='ImageToTensor', keys=['img']),
210
+ dict(type='Collect', keys=['img'])
211
+ ])
212
+ ]
213
+ data = dict(
214
+ samples_per_gpu=4,
215
+ workers_per_gpu=2,
216
+ train=dict(
217
+ type='MultiViewCocoDataset',
218
+ dataset=dict(
219
+ type='CocoDataset',
220
+ classes=['selective_search'],
221
+ ann_file=
222
+ 'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
223
+ img_prefix='data/coco/train2017/',
224
+ pipeline=[
225
+ dict(type='LoadImageFromFile'),
226
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
227
+ ]),
228
+ num_views=2,
229
+ pipelines=[[{
230
+ 'type':
231
+ 'Resize',
232
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
233
+ (1333, 768), (1333, 800)],
234
+ 'multiscale_mode':
235
+ 'value',
236
+ 'keep_ratio':
237
+ True
238
+ }, {
239
+ 'type': 'FilterAnnotations',
240
+ 'min_gt_bbox_wh': (0.01, 0.01)
241
+ }, {
242
+ 'type': 'Pad',
243
+ 'size_divisor': 32
244
+ }, {
245
+ 'type': 'RandFlip',
246
+ 'flip_ratio': 0.5
247
+ }, {
248
+ 'type':
249
+ 'OneOf',
250
+ 'transforms': [{
251
+ 'type': 'Identity'
252
+ }, {
253
+ 'type': 'AutoContrast'
254
+ }, {
255
+ 'type': 'RandEqualize'
256
+ }, {
257
+ 'type': 'RandSolarize'
258
+ }, {
259
+ 'type': 'RandColor'
260
+ }, {
261
+ 'type': 'RandContrast'
262
+ }, {
263
+ 'type': 'RandBrightness'
264
+ }, {
265
+ 'type': 'RandSharpness'
266
+ }, {
267
+ 'type': 'RandPosterize'
268
+ }]
269
+ }, {
270
+ 'type': 'Normalize',
271
+ 'mean': [123.675, 116.28, 103.53],
272
+ 'std': [58.395, 57.12, 57.375],
273
+ 'to_rgb': True
274
+ }, {
275
+ 'type': 'DefaultFormatBundle'
276
+ }, {
277
+ 'type': 'Collect',
278
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
279
+ }],
280
+ [{
281
+ 'type':
282
+ 'Resize',
283
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704),
284
+ (1333, 736), (1333, 768), (1333, 800)],
285
+ 'multiscale_mode':
286
+ 'value',
287
+ 'keep_ratio':
288
+ True
289
+ }, {
290
+ 'type': 'FilterAnnotations',
291
+ 'min_gt_bbox_wh': (0.01, 0.01)
292
+ }, {
293
+ 'type': 'Pad',
294
+ 'size_divisor': 32
295
+ }, {
296
+ 'type': 'RandFlip',
297
+ 'flip_ratio': 0.5
298
+ }, {
299
+ 'type':
300
+ 'OneOf',
301
+ 'transforms': [{
302
+ 'type': 'Identity'
303
+ }, {
304
+ 'type': 'AutoContrast'
305
+ }, {
306
+ 'type': 'RandEqualize'
307
+ }, {
308
+ 'type': 'RandSolarize'
309
+ }, {
310
+ 'type': 'RandColor'
311
+ }, {
312
+ 'type': 'RandContrast'
313
+ }, {
314
+ 'type': 'RandBrightness'
315
+ }, {
316
+ 'type': 'RandSharpness'
317
+ }, {
318
+ 'type': 'RandPosterize'
319
+ }]
320
+ }, {
321
+ 'type': 'Normalize',
322
+ 'mean': [123.675, 116.28, 103.53],
323
+ 'std': [58.395, 57.12, 57.375],
324
+ 'to_rgb': True
325
+ }, {
326
+ 'type': 'DefaultFormatBundle'
327
+ }, {
328
+ 'type': 'Collect',
329
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
330
+ }]]),
331
+ val=dict(
332
+ type='CocoDataset',
333
+ classes=['selective_search'],
334
+ ann_file='data/coco/annotations/instances_val2017.json',
335
+ img_prefix='data/coco/val2017/',
336
+ pipeline=[
337
+ dict(type='LoadImageFromFile'),
338
+ dict(
339
+ type='MultiScaleFlipAug',
340
+ img_scale=(1333, 800),
341
+ flip=False,
342
+ transforms=[
343
+ dict(type='Resize', keep_ratio=True),
344
+ dict(type='RandomFlip'),
345
+ dict(
346
+ type='Normalize',
347
+ mean=[123.675, 116.28, 103.53],
348
+ std=[58.395, 57.12, 57.375],
349
+ to_rgb=True),
350
+ dict(type='Pad', size_divisor=32),
351
+ dict(type='ImageToTensor', keys=['img']),
352
+ dict(type='Collect', keys=['img'])
353
+ ])
354
+ ]),
355
+ test=dict(
356
+ type='CocoDataset',
357
+ classes=['selective_search'],
358
+ ann_file='data/coco/annotations/instances_val2017.json',
359
+ img_prefix='data/coco/val2017/',
360
+ pipeline=[
361
+ dict(type='LoadImageFromFile'),
362
+ dict(
363
+ type='MultiScaleFlipAug',
364
+ img_scale=(1333, 800),
365
+ flip=False,
366
+ transforms=[
367
+ dict(type='Resize', keep_ratio=True),
368
+ dict(type='RandomFlip'),
369
+ dict(
370
+ type='Normalize',
371
+ mean=[123.675, 116.28, 103.53],
372
+ std=[58.395, 57.12, 57.375],
373
+ to_rgb=True),
374
+ dict(type='Pad', size_divisor=32),
375
+ dict(type='ImageToTensor', keys=['img']),
376
+ dict(type='Collect', keys=['img'])
377
+ ])
378
+ ]))
379
+ evaluation = dict(interval=65535, gpu_collect=True, metric='bbox')
380
+ optimizer = dict(
381
+ type='AdamW',
382
+ lr=6e-05,
383
+ betas=(0.9, 0.999),
384
+ weight_decay=0.05,
385
+ paramwise_cfg=dict(
386
+ custom_keys=dict(
387
+ absolute_pos_embed=dict(decay_mult=0.0),
388
+ relative_position_bias_table=dict(decay_mult=0.0),
389
+ norm=dict(decay_mult=0.0))))
390
+ optimizer_config = dict(grad_clip=None)
391
+ lr_config = dict(
392
+ policy='step',
393
+ warmup='linear',
394
+ warmup_iters=1000,
395
+ warmup_ratio=0.001,
396
+ step=[8, 11])
397
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
398
+ checkpoint_config = dict(interval=1)
399
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
400
+ custom_hooks = [
401
+ dict(type='MomentumUpdateHook'),
402
+ dict(
403
+ type='MMDetWandbHook',
404
+ init_kwargs=dict(project='I2B', group='pretrain'),
405
+ interval=50,
406
+ num_eval_images=0,
407
+ log_checkpoint=False)
408
+ ]
409
+ dist_params = dict(backend='nccl')
410
+ log_level = 'INFO'
411
+ load_from = None
412
+ resume_from = None
413
+ workflow = [('train', 1)]
414
+ opencv_num_threads = 0
415
+ mp_start_method = 'fork'
416
+ auto_scale_lr = dict(enable=True, base_batch_size=32)
417
+ custom_imports = dict(
418
+ imports=[
419
+ 'mmselfsup.datasets.pipelines',
420
+ 'selfsup.core.hook.momentum_update_hook',
421
+ 'selfsup.datasets.pipelines.selfsup_pipelines',
422
+ 'selfsup.datasets.pipelines.rand_aug',
423
+ 'selfsup.datasets.single_view_coco',
424
+ 'selfsup.datasets.multi_view_coco',
425
+ 'selfsup.models.losses.contrastive_loss',
426
+ 'selfsup.models.dense_heads.fcos_head',
427
+ 'selfsup.models.dense_heads.retina_head',
428
+ 'selfsup.models.dense_heads.detr_head',
429
+ 'selfsup.models.dense_heads.deformable_detr_head',
430
+ 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
431
+ 'selfsup.models.roi_heads.standard_roi_head',
432
+ 'selfsup.models.detectors.selfsup_detector',
433
+ 'selfsup.models.detectors.selfsup_fcos',
434
+ 'selfsup.models.detectors.selfsup_detr',
435
+ 'selfsup.models.detectors.selfsup_deformable_detr',
436
+ 'selfsup.models.detectors.selfsup_retinanet',
437
+ 'selfsup.models.detectors.selfsup_mask_rcnn',
438
+ 'selfsup.core.bbox.assigners.hungarian_assigner',
439
+ 'selfsup.core.bbox.assigners.pseudo_hungarian_assigner',
440
+ 'selfsup.core.bbox.match_costs.match_cost'
441
+ ],
442
+ allow_failed_imports=False)
443
+ pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth'
444
+ find_unused_parameters = True
445
+ work_dir = 'work_dirs/selfsup_mask-rcnn_swin-b_lsj-3x-coco_simmim-pretrain'
446
+ auto_resume = False
447
+ gpu_ids = range(0, 8)
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/20220901_231349.log.json ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/final_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7905e6dc0b5158a6f896575d62a3f3ed0c1dd9d7ff4bb8e923339c2b46b587c
3
+ size 170913485
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain/mask_rcnn.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='SelfSupDetector',
3
+ backbone=dict(
4
+ type='SelfSupMaskRCNN',
5
+ backbone=dict(
6
+ type='ResNet',
7
+ depth=50,
8
+ num_stages=4,
9
+ out_indices=(0, 1, 2, 3),
10
+ frozen_stages=4,
11
+ norm_cfg=dict(type='BN', requires_grad=False),
12
+ norm_eval=True,
13
+ style='pytorch',
14
+ init_cfg=dict(
15
+ type='Pretrained',
16
+ checkpoint='pretrain/mocov2_resnet50_256bs-coslr-800e_in1k.pth'
17
+ )),
18
+ neck=dict(
19
+ type='FPN',
20
+ in_channels=[256, 512, 1024, 2048],
21
+ out_channels=256,
22
+ num_outs=5),
23
+ rpn_head=dict(
24
+ type='RPNHead',
25
+ in_channels=256,
26
+ feat_channels=256,
27
+ anchor_generator=dict(
28
+ type='AnchorGenerator',
29
+ scales=[8],
30
+ ratios=[0.5, 1.0, 2.0],
31
+ strides=[4, 8, 16, 32, 64]),
32
+ bbox_coder=dict(
33
+ type='DeltaXYWHBBoxCoder',
34
+ target_means=[0.0, 0.0, 0.0, 0.0],
35
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
36
+ loss_cls=dict(
37
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
38
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
39
+ roi_head=dict(
40
+ type='SelfSupStandardRoIHead',
41
+ bbox_roi_extractor=dict(
42
+ type='SingleRoIExtractor',
43
+ roi_layer=dict(
44
+ type='RoIAlign', output_size=7, sampling_ratio=0),
45
+ out_channels=256,
46
+ featmap_strides=[4, 8, 16, 32]),
47
+ bbox_head=dict(
48
+ type='SelfSupShared4Conv1FCBBoxHead',
49
+ in_channels=256,
50
+ num_classes=256,
51
+ roi_feat_size=7,
52
+ loss_cls=dict(
53
+ type='ContrastiveLoss', loss_weight=1.0, temperature=0.2)),
54
+ mask_roi_extractor=None,
55
+ mask_head=None),
56
+ train_cfg=dict(
57
+ rpn=dict(
58
+ assigner=dict(
59
+ type='MaxIoUAssigner',
60
+ pos_iou_thr=0.7,
61
+ neg_iou_thr=0.3,
62
+ min_pos_iou=0.3,
63
+ match_low_quality=True,
64
+ ignore_iof_thr=-1),
65
+ sampler=dict(
66
+ type='RandomSampler',
67
+ num=4096,
68
+ pos_fraction=1.0,
69
+ neg_pos_ub=-1,
70
+ add_gt_as_proposals=False),
71
+ allowed_border=-1,
72
+ pos_weight=-1,
73
+ debug=False),
74
+ rpn_proposal=dict(
75
+ nms_pre=2000,
76
+ max_per_img=1000,
77
+ nms=dict(type='nms', iou_threshold=0.7),
78
+ min_bbox_size=0),
79
+ rcnn=dict(
80
+ assigner=dict(
81
+ type='MaxIoUAssigner',
82
+ pos_iou_thr=0.5,
83
+ neg_iou_thr=0.5,
84
+ min_pos_iou=0.5,
85
+ match_low_quality=True,
86
+ ignore_iof_thr=-1,
87
+ gt_max_assign_all=False),
88
+ sampler=dict(
89
+ type='RandomSampler',
90
+ num=4096,
91
+ pos_fraction=1,
92
+ neg_pos_ub=0,
93
+ add_gt_as_proposals=True),
94
+ mask_size=28,
95
+ pos_weight=-1,
96
+ debug=False)),
97
+ test_cfg=dict(
98
+ rpn=dict(
99
+ nms_pre=1000,
100
+ max_per_img=1000,
101
+ nms=dict(type='nms', iou_threshold=0.7),
102
+ min_bbox_size=0),
103
+ rcnn=dict(
104
+ score_thr=0.05,
105
+ nms=dict(type='nms', iou_threshold=0.5),
106
+ max_per_img=100,
107
+ mask_thr_binary=0.5))))
108
+ train_dataset_type = 'MultiViewCocoDataset'
109
+ test_dataset_type = 'CocoDataset'
110
+ data_root = 'data/coco/'
111
+ classes = ['selective_search']
112
+ img_norm_cfg = dict(
113
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
114
+ load_pipeline = [
115
+ dict(type='LoadImageFromFile'),
116
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
117
+ ]
118
+ train_pipeline1 = [
119
+ dict(
120
+ type='Resize',
121
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
122
+ (1333, 768), (1333, 800)],
123
+ multiscale_mode='value',
124
+ keep_ratio=True),
125
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
126
+ dict(type='Pad', size_divisor=32),
127
+ dict(type='RandFlip', flip_ratio=0.5),
128
+ dict(
129
+ type='OneOf',
130
+ transforms=[
131
+ dict(type='Identity'),
132
+ dict(type='AutoContrast'),
133
+ dict(type='RandEqualize'),
134
+ dict(type='RandSolarize'),
135
+ dict(type='RandColor'),
136
+ dict(type='RandContrast'),
137
+ dict(type='RandBrightness'),
138
+ dict(type='RandSharpness'),
139
+ dict(type='RandPosterize')
140
+ ]),
141
+ dict(
142
+ type='Normalize',
143
+ mean=[123.675, 116.28, 103.53],
144
+ std=[58.395, 57.12, 57.375],
145
+ to_rgb=True),
146
+ dict(type='DefaultFormatBundle'),
147
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
148
+ ]
149
+ train_pipeline2 = [
150
+ dict(
151
+ type='Resize',
152
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
153
+ (1333, 768), (1333, 800)],
154
+ multiscale_mode='value',
155
+ keep_ratio=True),
156
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
157
+ dict(type='Pad', size_divisor=32),
158
+ dict(type='RandFlip', flip_ratio=0.5),
159
+ dict(
160
+ type='OneOf',
161
+ transforms=[
162
+ dict(type='Identity'),
163
+ dict(type='AutoContrast'),
164
+ dict(type='RandEqualize'),
165
+ dict(type='RandSolarize'),
166
+ dict(type='RandColor'),
167
+ dict(type='RandContrast'),
168
+ dict(type='RandBrightness'),
169
+ dict(type='RandSharpness'),
170
+ dict(type='RandPosterize')
171
+ ]),
172
+ dict(
173
+ type='Normalize',
174
+ mean=[123.675, 116.28, 103.53],
175
+ std=[58.395, 57.12, 57.375],
176
+ to_rgb=True),
177
+ dict(type='DefaultFormatBundle'),
178
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
179
+ ]
180
+ test_pipeline = [
181
+ dict(type='LoadImageFromFile'),
182
+ dict(
183
+ type='MultiScaleFlipAug',
184
+ img_scale=(1333, 800),
185
+ flip=False,
186
+ transforms=[
187
+ dict(type='Resize', keep_ratio=True),
188
+ dict(type='RandomFlip'),
189
+ dict(
190
+ type='Normalize',
191
+ mean=[123.675, 116.28, 103.53],
192
+ std=[58.395, 57.12, 57.375],
193
+ to_rgb=True),
194
+ dict(type='Pad', size_divisor=32),
195
+ dict(type='ImageToTensor', keys=['img']),
196
+ dict(type='Collect', keys=['img'])
197
+ ])
198
+ ]
199
+ data = dict(
200
+ samples_per_gpu=2,
201
+ workers_per_gpu=2,
202
+ train=dict(
203
+ type='MultiViewCocoDataset',
204
+ dataset=dict(
205
+ type='CocoDataset',
206
+ classes=['selective_search'],
207
+ ann_file=
208
+ 'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
209
+ img_prefix='data/coco/train2017/',
210
+ pipeline=[
211
+ dict(type='LoadImageFromFile'),
212
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
213
+ ]),
214
+ num_views=2,
215
+ pipelines=[[{
216
+ 'type':
217
+ 'Resize',
218
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
219
+ (1333, 768), (1333, 800)],
220
+ 'multiscale_mode':
221
+ 'value',
222
+ 'keep_ratio':
223
+ True
224
+ }, {
225
+ 'type': 'FilterAnnotations',
226
+ 'min_gt_bbox_wh': (0.01, 0.01)
227
+ }, {
228
+ 'type': 'Pad',
229
+ 'size_divisor': 32
230
+ }, {
231
+ 'type': 'RandFlip',
232
+ 'flip_ratio': 0.5
233
+ }, {
234
+ 'type':
235
+ 'OneOf',
236
+ 'transforms': [{
237
+ 'type': 'Identity'
238
+ }, {
239
+ 'type': 'AutoContrast'
240
+ }, {
241
+ 'type': 'RandEqualize'
242
+ }, {
243
+ 'type': 'RandSolarize'
244
+ }, {
245
+ 'type': 'RandColor'
246
+ }, {
247
+ 'type': 'RandContrast'
248
+ }, {
249
+ 'type': 'RandBrightness'
250
+ }, {
251
+ 'type': 'RandSharpness'
252
+ }, {
253
+ 'type': 'RandPosterize'
254
+ }]
255
+ }, {
256
+ 'type': 'Normalize',
257
+ 'mean': [123.675, 116.28, 103.53],
258
+ 'std': [58.395, 57.12, 57.375],
259
+ 'to_rgb': True
260
+ }, {
261
+ 'type': 'DefaultFormatBundle'
262
+ }, {
263
+ 'type': 'Collect',
264
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
265
+ }],
266
+ [{
267
+ 'type':
268
+ 'Resize',
269
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704),
270
+ (1333, 736), (1333, 768), (1333, 800)],
271
+ 'multiscale_mode':
272
+ 'value',
273
+ 'keep_ratio':
274
+ True
275
+ }, {
276
+ 'type': 'FilterAnnotations',
277
+ 'min_gt_bbox_wh': (0.01, 0.01)
278
+ }, {
279
+ 'type': 'Pad',
280
+ 'size_divisor': 32
281
+ }, {
282
+ 'type': 'RandFlip',
283
+ 'flip_ratio': 0.5
284
+ }, {
285
+ 'type':
286
+ 'OneOf',
287
+ 'transforms': [{
288
+ 'type': 'Identity'
289
+ }, {
290
+ 'type': 'AutoContrast'
291
+ }, {
292
+ 'type': 'RandEqualize'
293
+ }, {
294
+ 'type': 'RandSolarize'
295
+ }, {
296
+ 'type': 'RandColor'
297
+ }, {
298
+ 'type': 'RandContrast'
299
+ }, {
300
+ 'type': 'RandBrightness'
301
+ }, {
302
+ 'type': 'RandSharpness'
303
+ }, {
304
+ 'type': 'RandPosterize'
305
+ }]
306
+ }, {
307
+ 'type': 'Normalize',
308
+ 'mean': [123.675, 116.28, 103.53],
309
+ 'std': [58.395, 57.12, 57.375],
310
+ 'to_rgb': True
311
+ }, {
312
+ 'type': 'DefaultFormatBundle'
313
+ }, {
314
+ 'type': 'Collect',
315
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
316
+ }]]),
317
+ val=dict(
318
+ type='CocoDataset',
319
+ classes=['selective_search'],
320
+ ann_file='data/coco/annotations/instances_val2017.json',
321
+ img_prefix='data/coco/val2017/',
322
+ pipeline=[
323
+ dict(type='LoadImageFromFile'),
324
+ dict(
325
+ type='MultiScaleFlipAug',
326
+ img_scale=(1333, 800),
327
+ flip=False,
328
+ transforms=[
329
+ dict(type='Resize', keep_ratio=True),
330
+ dict(type='RandomFlip'),
331
+ dict(
332
+ type='Normalize',
333
+ mean=[123.675, 116.28, 103.53],
334
+ std=[58.395, 57.12, 57.375],
335
+ to_rgb=True),
336
+ dict(type='Pad', size_divisor=32),
337
+ dict(type='ImageToTensor', keys=['img']),
338
+ dict(type='Collect', keys=['img'])
339
+ ])
340
+ ]),
341
+ test=dict(
342
+ type='CocoDataset',
343
+ classes=['selective_search'],
344
+ ann_file='data/coco/annotations/instances_val2017.json',
345
+ img_prefix='data/coco/val2017/',
346
+ pipeline=[
347
+ dict(type='LoadImageFromFile'),
348
+ dict(
349
+ type='MultiScaleFlipAug',
350
+ img_scale=(1333, 800),
351
+ flip=False,
352
+ transforms=[
353
+ dict(type='Resize', keep_ratio=True),
354
+ dict(type='RandomFlip'),
355
+ dict(
356
+ type='Normalize',
357
+ mean=[123.675, 116.28, 103.53],
358
+ std=[58.395, 57.12, 57.375],
359
+ to_rgb=True),
360
+ dict(type='Pad', size_divisor=32),
361
+ dict(type='ImageToTensor', keys=['img']),
362
+ dict(type='Collect', keys=['img'])
363
+ ])
364
+ ]))
365
+ evaluation = dict(metric='bbox', interval=65535)
366
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
367
+ optimizer_config = dict(grad_clip=None)
368
+ lr_config = dict(
369
+ policy='step',
370
+ warmup='linear',
371
+ warmup_iters=500,
372
+ warmup_ratio=0.001,
373
+ step=[8, 11])
374
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
375
+ checkpoint_config = dict(interval=1)
376
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
377
+ custom_hooks = [
378
+ dict(type='MomentumUpdateHook'),
379
+ dict(
380
+ type='MMDetWandbHook',
381
+ init_kwargs=dict(project='mmdet_pretrain', group='pretrain'),
382
+ interval=50,
383
+ num_eval_images=0,
384
+ log_checkpoint=False)
385
+ ]
386
+ dist_params = dict(backend='nccl')
387
+ log_level = 'INFO'
388
+ load_from = None
389
+ resume_from = None
390
+ workflow = [('train', 1)]
391
+ opencv_num_threads = 0
392
+ mp_start_method = 'fork'
393
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
394
+ custom_imports = dict(
395
+ imports=[
396
+ 'mmselfsup.core', 'mmselfsup.datasets.pipelines',
397
+ 'selfsup.datasets.pipelines.selfsup_pipelines',
398
+ 'selfsup.datasets.pipelines.rand_aug',
399
+ 'selfsup.datasets.single_view_coco',
400
+ 'selfsup.datasets.multi_view_coco',
401
+ 'selfsup.models.losses.contrastive_loss',
402
+ 'selfsup.models.dense_heads.fcos_head',
403
+ 'selfsup.models.dense_heads.retina_head',
404
+ 'selfsup.models.dense_heads.detr_head',
405
+ 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
406
+ 'selfsup.models.roi_heads.standard_roi_head',
407
+ 'selfsup.models.detectors.selfsup_detector',
408
+ 'selfsup.models.detectors.selfsup_fcos',
409
+ 'selfsup.models.detectors.selfsup_detr',
410
+ 'selfsup.models.detectors.selfsup_retinanet',
411
+ 'selfsup.models.detectors.selfsup_mask_rcnn',
412
+ 'selfsup.core.bbox.match_costs.match_cost'
413
+ ],
414
+ allow_failed_imports=False)
415
+ work_dir = 'work_dirs/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_mocov2-pretrain'
416
+ auto_resume = False
417
+ gpu_ids = range(0, 8)
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/20220901_231408.log.json ADDED
The diff for this file is too large to render. See raw diff
 
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/final_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4328af18c59d656037f81d1fd10de0878bc61849f1432e6adfe51edb37bf1bb
3
+ size 170913485
pretrain/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain/mask_rcnn.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='SelfSupDetector',
3
+ backbone=dict(
4
+ type='SelfSupMaskRCNN',
5
+ backbone=dict(
6
+ type='ResNet',
7
+ depth=50,
8
+ num_stages=4,
9
+ out_indices=(0, 1, 2, 3),
10
+ frozen_stages=4,
11
+ norm_cfg=dict(type='BN', requires_grad=False),
12
+ norm_eval=True,
13
+ style='pytorch',
14
+ init_cfg=dict(
15
+ type='Pretrained',
16
+ checkpoint='pretrain/pixpro_resnet50_8xb128-coslr-400e_in1k.pth'
17
+ )),
18
+ neck=dict(
19
+ type='FPN',
20
+ in_channels=[256, 512, 1024, 2048],
21
+ out_channels=256,
22
+ num_outs=5),
23
+ rpn_head=dict(
24
+ type='RPNHead',
25
+ in_channels=256,
26
+ feat_channels=256,
27
+ anchor_generator=dict(
28
+ type='AnchorGenerator',
29
+ scales=[8],
30
+ ratios=[0.5, 1.0, 2.0],
31
+ strides=[4, 8, 16, 32, 64]),
32
+ bbox_coder=dict(
33
+ type='DeltaXYWHBBoxCoder',
34
+ target_means=[0.0, 0.0, 0.0, 0.0],
35
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
36
+ loss_cls=dict(
37
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
38
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
39
+ roi_head=dict(
40
+ type='SelfSupStandardRoIHead',
41
+ bbox_roi_extractor=dict(
42
+ type='SingleRoIExtractor',
43
+ roi_layer=dict(
44
+ type='RoIAlign', output_size=7, sampling_ratio=0),
45
+ out_channels=256,
46
+ featmap_strides=[4, 8, 16, 32]),
47
+ bbox_head=dict(
48
+ type='SelfSupShared4Conv1FCBBoxHead',
49
+ in_channels=256,
50
+ num_classes=256,
51
+ roi_feat_size=7,
52
+ loss_cls=dict(
53
+ type='ContrastiveLoss', loss_weight=1.0, temperature=0.2)),
54
+ mask_roi_extractor=None,
55
+ mask_head=None),
56
+ train_cfg=dict(
57
+ rpn=dict(
58
+ assigner=dict(
59
+ type='MaxIoUAssigner',
60
+ pos_iou_thr=0.7,
61
+ neg_iou_thr=0.3,
62
+ min_pos_iou=0.3,
63
+ match_low_quality=True,
64
+ ignore_iof_thr=-1),
65
+ sampler=dict(
66
+ type='RandomSampler',
67
+ num=4096,
68
+ pos_fraction=1.0,
69
+ neg_pos_ub=-1,
70
+ add_gt_as_proposals=False),
71
+ allowed_border=-1,
72
+ pos_weight=-1,
73
+ debug=False),
74
+ rpn_proposal=dict(
75
+ nms_pre=2000,
76
+ max_per_img=1000,
77
+ nms=dict(type='nms', iou_threshold=0.7),
78
+ min_bbox_size=0),
79
+ rcnn=dict(
80
+ assigner=dict(
81
+ type='MaxIoUAssigner',
82
+ pos_iou_thr=0.5,
83
+ neg_iou_thr=0.5,
84
+ min_pos_iou=0.5,
85
+ match_low_quality=True,
86
+ ignore_iof_thr=-1,
87
+ gt_max_assign_all=False),
88
+ sampler=dict(
89
+ type='RandomSampler',
90
+ num=4096,
91
+ pos_fraction=1,
92
+ neg_pos_ub=0,
93
+ add_gt_as_proposals=True),
94
+ mask_size=28,
95
+ pos_weight=-1,
96
+ debug=False)),
97
+ test_cfg=dict(
98
+ rpn=dict(
99
+ nms_pre=1000,
100
+ max_per_img=1000,
101
+ nms=dict(type='nms', iou_threshold=0.7),
102
+ min_bbox_size=0),
103
+ rcnn=dict(
104
+ score_thr=0.05,
105
+ nms=dict(type='nms', iou_threshold=0.5),
106
+ max_per_img=100,
107
+ mask_thr_binary=0.5))))
108
+ train_dataset_type = 'MultiViewCocoDataset'
109
+ test_dataset_type = 'CocoDataset'
110
+ data_root = 'data/coco/'
111
+ classes = ['selective_search']
112
+ img_norm_cfg = dict(
113
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
114
+ load_pipeline = [
115
+ dict(type='LoadImageFromFile'),
116
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
117
+ ]
118
+ train_pipeline1 = [
119
+ dict(
120
+ type='Resize',
121
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
122
+ (1333, 768), (1333, 800)],
123
+ multiscale_mode='value',
124
+ keep_ratio=True),
125
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
126
+ dict(type='Pad', size_divisor=32),
127
+ dict(type='RandFlip', flip_ratio=0.5),
128
+ dict(
129
+ type='OneOf',
130
+ transforms=[
131
+ dict(type='Identity'),
132
+ dict(type='AutoContrast'),
133
+ dict(type='RandEqualize'),
134
+ dict(type='RandSolarize'),
135
+ dict(type='RandColor'),
136
+ dict(type='RandContrast'),
137
+ dict(type='RandBrightness'),
138
+ dict(type='RandSharpness'),
139
+ dict(type='RandPosterize')
140
+ ]),
141
+ dict(
142
+ type='Normalize',
143
+ mean=[123.675, 116.28, 103.53],
144
+ std=[58.395, 57.12, 57.375],
145
+ to_rgb=True),
146
+ dict(type='DefaultFormatBundle'),
147
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
148
+ ]
149
+ train_pipeline2 = [
150
+ dict(
151
+ type='Resize',
152
+ img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
153
+ (1333, 768), (1333, 800)],
154
+ multiscale_mode='value',
155
+ keep_ratio=True),
156
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)),
157
+ dict(type='Pad', size_divisor=32),
158
+ dict(type='RandFlip', flip_ratio=0.5),
159
+ dict(
160
+ type='OneOf',
161
+ transforms=[
162
+ dict(type='Identity'),
163
+ dict(type='AutoContrast'),
164
+ dict(type='RandEqualize'),
165
+ dict(type='RandSolarize'),
166
+ dict(type='RandColor'),
167
+ dict(type='RandContrast'),
168
+ dict(type='RandBrightness'),
169
+ dict(type='RandSharpness'),
170
+ dict(type='RandPosterize')
171
+ ]),
172
+ dict(
173
+ type='Normalize',
174
+ mean=[123.675, 116.28, 103.53],
175
+ std=[58.395, 57.12, 57.375],
176
+ to_rgb=True),
177
+ dict(type='DefaultFormatBundle'),
178
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
179
+ ]
180
+ test_pipeline = [
181
+ dict(type='LoadImageFromFile'),
182
+ dict(
183
+ type='MultiScaleFlipAug',
184
+ img_scale=(1333, 800),
185
+ flip=False,
186
+ transforms=[
187
+ dict(type='Resize', keep_ratio=True),
188
+ dict(type='RandomFlip'),
189
+ dict(
190
+ type='Normalize',
191
+ mean=[123.675, 116.28, 103.53],
192
+ std=[58.395, 57.12, 57.375],
193
+ to_rgb=True),
194
+ dict(type='Pad', size_divisor=32),
195
+ dict(type='ImageToTensor', keys=['img']),
196
+ dict(type='Collect', keys=['img'])
197
+ ])
198
+ ]
199
+ data = dict(
200
+ samples_per_gpu=2,
201
+ workers_per_gpu=2,
202
+ train=dict(
203
+ type='MultiViewCocoDataset',
204
+ dataset=dict(
205
+ type='CocoDataset',
206
+ classes=['selective_search'],
207
+ ann_file=
208
+ 'data/coco/filtered_proposals/train2017_ratio3size0008@0.5.json',
209
+ img_prefix='data/coco/train2017/',
210
+ pipeline=[
211
+ dict(type='LoadImageFromFile'),
212
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=False)
213
+ ]),
214
+ num_views=2,
215
+ pipelines=[[{
216
+ 'type':
217
+ 'Resize',
218
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704), (1333, 736),
219
+ (1333, 768), (1333, 800)],
220
+ 'multiscale_mode':
221
+ 'value',
222
+ 'keep_ratio':
223
+ True
224
+ }, {
225
+ 'type': 'FilterAnnotations',
226
+ 'min_gt_bbox_wh': (0.01, 0.01)
227
+ }, {
228
+ 'type': 'Pad',
229
+ 'size_divisor': 32
230
+ }, {
231
+ 'type': 'RandFlip',
232
+ 'flip_ratio': 0.5
233
+ }, {
234
+ 'type':
235
+ 'OneOf',
236
+ 'transforms': [{
237
+ 'type': 'Identity'
238
+ }, {
239
+ 'type': 'AutoContrast'
240
+ }, {
241
+ 'type': 'RandEqualize'
242
+ }, {
243
+ 'type': 'RandSolarize'
244
+ }, {
245
+ 'type': 'RandColor'
246
+ }, {
247
+ 'type': 'RandContrast'
248
+ }, {
249
+ 'type': 'RandBrightness'
250
+ }, {
251
+ 'type': 'RandSharpness'
252
+ }, {
253
+ 'type': 'RandPosterize'
254
+ }]
255
+ }, {
256
+ 'type': 'Normalize',
257
+ 'mean': [123.675, 116.28, 103.53],
258
+ 'std': [58.395, 57.12, 57.375],
259
+ 'to_rgb': True
260
+ }, {
261
+ 'type': 'DefaultFormatBundle'
262
+ }, {
263
+ 'type': 'Collect',
264
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
265
+ }],
266
+ [{
267
+ 'type':
268
+ 'Resize',
269
+ 'img_scale': [(1333, 640), (1333, 672), (1333, 704),
270
+ (1333, 736), (1333, 768), (1333, 800)],
271
+ 'multiscale_mode':
272
+ 'value',
273
+ 'keep_ratio':
274
+ True
275
+ }, {
276
+ 'type': 'FilterAnnotations',
277
+ 'min_gt_bbox_wh': (0.01, 0.01)
278
+ }, {
279
+ 'type': 'Pad',
280
+ 'size_divisor': 32
281
+ }, {
282
+ 'type': 'RandFlip',
283
+ 'flip_ratio': 0.5
284
+ }, {
285
+ 'type':
286
+ 'OneOf',
287
+ 'transforms': [{
288
+ 'type': 'Identity'
289
+ }, {
290
+ 'type': 'AutoContrast'
291
+ }, {
292
+ 'type': 'RandEqualize'
293
+ }, {
294
+ 'type': 'RandSolarize'
295
+ }, {
296
+ 'type': 'RandColor'
297
+ }, {
298
+ 'type': 'RandContrast'
299
+ }, {
300
+ 'type': 'RandBrightness'
301
+ }, {
302
+ 'type': 'RandSharpness'
303
+ }, {
304
+ 'type': 'RandPosterize'
305
+ }]
306
+ }, {
307
+ 'type': 'Normalize',
308
+ 'mean': [123.675, 116.28, 103.53],
309
+ 'std': [58.395, 57.12, 57.375],
310
+ 'to_rgb': True
311
+ }, {
312
+ 'type': 'DefaultFormatBundle'
313
+ }, {
314
+ 'type': 'Collect',
315
+ 'keys': ['img', 'gt_bboxes', 'gt_labels']
316
+ }]]),
317
+ val=dict(
318
+ type='CocoDataset',
319
+ classes=['selective_search'],
320
+ ann_file='data/coco/annotations/instances_val2017.json',
321
+ img_prefix='data/coco/val2017/',
322
+ pipeline=[
323
+ dict(type='LoadImageFromFile'),
324
+ dict(
325
+ type='MultiScaleFlipAug',
326
+ img_scale=(1333, 800),
327
+ flip=False,
328
+ transforms=[
329
+ dict(type='Resize', keep_ratio=True),
330
+ dict(type='RandomFlip'),
331
+ dict(
332
+ type='Normalize',
333
+ mean=[123.675, 116.28, 103.53],
334
+ std=[58.395, 57.12, 57.375],
335
+ to_rgb=True),
336
+ dict(type='Pad', size_divisor=32),
337
+ dict(type='ImageToTensor', keys=['img']),
338
+ dict(type='Collect', keys=['img'])
339
+ ])
340
+ ]),
341
+ test=dict(
342
+ type='CocoDataset',
343
+ classes=['selective_search'],
344
+ ann_file='data/coco/annotations/instances_val2017.json',
345
+ img_prefix='data/coco/val2017/',
346
+ pipeline=[
347
+ dict(type='LoadImageFromFile'),
348
+ dict(
349
+ type='MultiScaleFlipAug',
350
+ img_scale=(1333, 800),
351
+ flip=False,
352
+ transforms=[
353
+ dict(type='Resize', keep_ratio=True),
354
+ dict(type='RandomFlip'),
355
+ dict(
356
+ type='Normalize',
357
+ mean=[123.675, 116.28, 103.53],
358
+ std=[58.395, 57.12, 57.375],
359
+ to_rgb=True),
360
+ dict(type='Pad', size_divisor=32),
361
+ dict(type='ImageToTensor', keys=['img']),
362
+ dict(type='Collect', keys=['img'])
363
+ ])
364
+ ]))
365
+ evaluation = dict(metric='bbox', interval=65535)
366
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
367
+ optimizer_config = dict(grad_clip=None)
368
+ lr_config = dict(
369
+ policy='step',
370
+ warmup='linear',
371
+ warmup_iters=500,
372
+ warmup_ratio=0.001,
373
+ step=[8, 11])
374
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
375
+ checkpoint_config = dict(interval=1)
376
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
377
+ custom_hooks = [
378
+ dict(type='MomentumUpdateHook'),
379
+ dict(
380
+ type='MMDetWandbHook',
381
+ init_kwargs=dict(project='mmdet_pretrain', group='pretrain'),
382
+ interval=50,
383
+ num_eval_images=0,
384
+ log_checkpoint=False)
385
+ ]
386
+ dist_params = dict(backend='nccl')
387
+ log_level = 'INFO'
388
+ load_from = None
389
+ resume_from = None
390
+ workflow = [('train', 1)]
391
+ opencv_num_threads = 0
392
+ mp_start_method = 'fork'
393
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
394
+ custom_imports = dict(
395
+ imports=[
396
+ 'mmselfsup.core', 'mmselfsup.datasets.pipelines',
397
+ 'selfsup.datasets.pipelines.selfsup_pipelines',
398
+ 'selfsup.datasets.pipelines.rand_aug',
399
+ 'selfsup.datasets.single_view_coco',
400
+ 'selfsup.datasets.multi_view_coco',
401
+ 'selfsup.models.losses.contrastive_loss',
402
+ 'selfsup.models.dense_heads.fcos_head',
403
+ 'selfsup.models.dense_heads.retina_head',
404
+ 'selfsup.models.dense_heads.detr_head',
405
+ 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head',
406
+ 'selfsup.models.roi_heads.standard_roi_head',
407
+ 'selfsup.models.detectors.selfsup_detector',
408
+ 'selfsup.models.detectors.selfsup_fcos',
409
+ 'selfsup.models.detectors.selfsup_detr',
410
+ 'selfsup.models.detectors.selfsup_retinanet',
411
+ 'selfsup.models.detectors.selfsup_mask_rcnn',
412
+ 'selfsup.core.bbox.match_costs.match_cost'
413
+ ],
414
+ allow_failed_imports=False)
415
+ work_dir = 'work_dirs/selfsup_mask_rcnn_soft-teacher-mstrain_sampler-4096-1.0_temperature-0.2_1x_coco_pixpro-pretrain'
416
+ auto_resume = False
417
+ gpu_ids = range(0, 8)