kuldeepbarad commited on
Commit
142877e
1 Parent(s): 27b0d74

Upload 8 files

Browse files

add pre-trained checkpoints for full and partial point clouds

checkpoints/generation/fpc_1a_latentc3_z4_pc64/ddm/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d5224635de058c47919ceb81ba57e2a4b311b063660e6e9ea914e216dbcbc8
3
+ size 47899897
checkpoints/generation/fpc_1a_latentc3_z4_pc64/ddm/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ ## -------------------- Most frequently changed params here --------------------
4
+
5
+ resume_training_from_last = True
6
+
7
+ max_steps = 180000
8
+ batch_size = 10
9
+
10
+ num_gpus = 1
11
+ num_workers_per_gpu = 7
12
+
13
+ # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
14
+ vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
15
+ ddm_ckpt_path = None
16
+
17
+ max_scenes = None
18
+
19
+ ## -------------------- Inputs/Shapes ------------------------
20
+ # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
21
+
22
+ pc_num_points = 1024
23
+ pc_latent_dims = 64
24
+ pc_latent_channels = 3
25
+
26
+ grasp_pose_dims = 6
27
+ num_output_qualities = 0
28
+ grasp_latent_dims = 4
29
+
30
+ grasp_representation_dims = (
31
+ grasp_pose_dims + num_output_qualities + 1
32
+ if num_output_qualities is not None
33
+ else grasp_pose_dims + 1
34
+ )
35
+
36
+ ## ----------------------- Model -----------------------
37
+
38
+ dropout = 0.1 # or None
39
+
40
+ pc_encoder_config = dict(
41
+ type="PVCNNEncoder",
42
+ args=dict(
43
+ in_features=3,
44
+ n_points=pc_num_points,
45
+ scale_channels=0.75,
46
+ scale_voxel_resolution=0.75,
47
+ num_blocks=(1, 1, 1, 1),
48
+ out_channels=pc_latent_channels,
49
+ use_global_attention=False,
50
+ ),
51
+ )
52
+
53
+ grasp_encoder_config = dict(
54
+ type="ResNet1D",
55
+ args=dict(
56
+ in_features=grasp_representation_dims,
57
+ block_channels=(32, 64, 128, 256),
58
+ input_conditioning_dims=pc_latent_dims,
59
+ resnet_block_groups=4,
60
+ dropout=dropout,
61
+ ),
62
+ )
63
+
64
+ decoder_config = dict(
65
+ type="ResNet1D",
66
+ args=dict(
67
+ block_channels=(32, 64, 128, 256),
68
+ # out_dim=grasp_pose_dims,
69
+ input_conditioning_dims=pc_latent_dims,
70
+ resnet_block_groups=4,
71
+ dropout=dropout,
72
+ ),
73
+ )
74
+
75
+ loss_config = dict(
76
+ reconstruction_loss=dict(
77
+ type="GraspReconstructionLoss",
78
+ name="reconstruction_loss",
79
+ args=dict(translation_weight=1, rotation_weight=1),
80
+ ),
81
+ latent_loss=dict(
82
+ type="VAELatentLoss",
83
+ args=dict(
84
+ name="grasp_latent",
85
+ cyclical_annealing=True,
86
+ num_steps=max_steps,
87
+ num_cycles=1,
88
+ ratio=0.5,
89
+ start=1e-7,
90
+ stop=0.1,
91
+ ),
92
+ ),
93
+ classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
94
+ # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
95
+ )
96
+
97
+ denoiser_model = dict(
98
+ type="TimeConditionedResNet1D",
99
+ args=dict(
100
+ dim=grasp_latent_dims,
101
+ channels=1,
102
+ block_channels=(32, 64, 128, 256),
103
+ input_conditioning_dims=pc_latent_dims,
104
+ resnet_block_groups=4,
105
+ dropout=dropout,
106
+ is_time_conditioned=True,
107
+ learned_variance=False,
108
+ learned_sinusoidal_cond=False,
109
+ random_fourier_features=True,
110
+ # learned_sinusoidal_dim=16,
111
+ ),
112
+ )
113
+ # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
114
+ # See models/builder.py for more info.
115
+ model = dict(
116
+ vae=dict(
117
+ model=dict(
118
+ type="GraspCVAE",
119
+ args=dict(
120
+ grasp_latent_size=grasp_latent_dims,
121
+ pc_latent_size=pc_latent_dims,
122
+ pc_encoder_config=pc_encoder_config,
123
+ grasp_encoder_config=grasp_encoder_config,
124
+ decoder_config=decoder_config,
125
+ loss_config=loss_config,
126
+ num_output_qualities=num_output_qualities,
127
+ intermediate_feature_resolution=16,
128
+ ),
129
+ ),
130
+ ckpt_path=vae_ckpt_path,
131
+ ),
132
+ ddm=dict(
133
+ model=dict(
134
+ type="GraspLatentDDM",
135
+ args=dict(
136
+ model=denoiser_model,
137
+ latent_in_features=grasp_latent_dims,
138
+ diffusion_timesteps=1000,
139
+ noise_scheduler_type="ddpm",
140
+ diffusion_loss="l2",
141
+ beta_schedule="linear",
142
+ is_conditioned=True,
143
+ joint_training=False,
144
+ denoising_loss_weight=1,
145
+ variance_type="fixed_large",
146
+ elucidated_diffusion=False,
147
+ beta_start=0.00005,
148
+ beta_end=0.001,
149
+ ),
150
+ ),
151
+ ckpt_path=ddm_ckpt_path,
152
+ use_vae_ema_model=True,
153
+ ),
154
+ )
155
+ ## -- Data --
156
+ augs_config = [
157
+ dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
158
+ dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
159
+ dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
160
+ ]
161
+
162
+ root_data_dir = "/mnt/irisgpfs/projects/mis-urso/grasp/data/acronym"
163
+ object_categories = [
164
+ "Cup",
165
+ "Mug",
166
+ "Fork",
167
+ "Hat",
168
+ "Bottle",
169
+ "Bowl",
170
+ "Car",
171
+ "Donut",
172
+ "Laptop",
173
+ "MousePad",
174
+ "Pencil",
175
+ "Plate",
176
+ "ScrewDriver",
177
+ "WineBottle",
178
+ "Backpack",
179
+ "Bag",
180
+ "Banana",
181
+ "Battery",
182
+ "BeanBag",
183
+ "Bear",
184
+ "Book",
185
+ "Books",
186
+ "Camera",
187
+ "CerealBox",
188
+ "Cookie",
189
+ "Hammer",
190
+ "Hanger",
191
+ "Knife",
192
+ "MilkCarton",
193
+ "Painting",
194
+ "PillBottle",
195
+ "Plant",
196
+ "PowerSocket",
197
+ "PowerStrip",
198
+ "PS3",
199
+ "PSP",
200
+ "Ring",
201
+ "Scissors",
202
+ "Shampoo",
203
+ "Shoes",
204
+ "Sheep",
205
+ "Shower",
206
+ "Sink",
207
+ "SoapBottle",
208
+ "SodaCan",
209
+ "Spoon",
210
+ "Statue",
211
+ "Teacup",
212
+ "Teapot",
213
+ "ToiletPaper",
214
+ "ToyFigure",
215
+ "Wallet",
216
+ "WineGlass",
217
+ "Cow",
218
+ "Sheep",
219
+ "Cat",
220
+ "Dog",
221
+ "Pizza",
222
+ "Elephant",
223
+ "Donkey",
224
+ "RubiksCube",
225
+ "Tank",
226
+ "Truck",
227
+ "USBStick",
228
+ ]
229
+
230
+ train_data = dict(
231
+ type="AcronymShapenetPointclouds",
232
+ args=dict(
233
+ data_root_dir=root_data_dir,
234
+ batch_num_points_per_pc=pc_num_points,
235
+ batch_num_grasps_per_pc=100,
236
+ rotation_repr="mrp",
237
+ augs_config=augs_config,
238
+ split="train",
239
+ batch_failed_grasps_ratio=0,
240
+ use_dataset_statistics_for_norm=False,
241
+ filter_categories=object_categories,
242
+ load_fixed_subset_grasps_per_obj=None,
243
+ num_repeat_dataset=10,
244
+ ),
245
+ )
246
+
247
+ data = dict(
248
+ train=train_data,
249
+ )
250
+
251
+ # Patch: Mesh Categories. Used for simulation
252
+ mesh_root = "/home/kuldeep/phd/data/ACRONYM/"
253
+ mesh_root = (
254
+ mesh_root
255
+ if os.path.exists(mesh_root)
256
+ else "/mnt/irisgpfs/users/kbarad/grasp/data/acronym"
257
+ )
258
+ mesh_categories = object_categories
259
+
260
+ ## -------------------- Trainer --------------------
261
+ ## Logger
262
+ logger = dict(type="WandbLogger", project="full-pc-ema-63c")
263
+
264
+ optimizer = dict(
265
+ initial_lr=0.001,
266
+ scheduler=dict(
267
+ type="MultiStepLR",
268
+ args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
269
+ ),
270
+ )
271
+
272
+ trainer = dict(
273
+ max_steps=max_steps,
274
+ batch_size=batch_size,
275
+ num_workers=num_workers_per_gpu * num_gpus,
276
+ accelerator="gpu",
277
+ devices=num_gpus,
278
+ strategy="ddp",
279
+ logger=logger,
280
+ log_every_n_steps=100,
281
+ optimizer=optimizer,
282
+ resume_training_from_last=resume_training_from_last,
283
+ check_val_every_n_epoch=1,
284
+ ema=dict(
285
+ beta=0.990,
286
+ update_after_step=1000,
287
+ ),
288
+ deterministic=True,
289
+ )
checkpoints/generation/fpc_1a_latentc3_z4_pc64/vae/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c6759f52c2fe0895bd54fee24425f720fa4d67fd8454e25cb6e338a03b05d7
3
+ size 40291309
checkpoints/generation/fpc_1a_latentc3_z4_pc64/vae/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ ## -------------------- Most frequently changed params here --------------------
4
+
5
+ resume_training_from_last = True
6
+
7
+ max_steps = 180000
8
+ batch_size = 10
9
+
10
+ num_gpus = 1
11
+ num_workers_per_gpu = 7
12
+
13
+ # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
14
+ vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
15
+ ddm_ckpt_path = None
16
+
17
+ max_scenes = None
18
+
19
+ ## -------------------- Inputs/Shapes ------------------------
20
+ # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
21
+
22
+ pc_num_points = 1024
23
+ pc_latent_dims = 64
24
+ pc_latent_channels = 3
25
+
26
+ grasp_pose_dims = 6
27
+ num_output_qualities = 0
28
+ grasp_latent_dims = 4
29
+
30
+ grasp_representation_dims = (
31
+ grasp_pose_dims + num_output_qualities + 1
32
+ if num_output_qualities is not None
33
+ else grasp_pose_dims + 1
34
+ )
35
+
36
+ ## ----------------------- Model -----------------------
37
+
38
+ dropout = 0.1 # or None
39
+
40
+ pc_encoder_config = dict(
41
+ type="PVCNNEncoder",
42
+ args=dict(
43
+ in_features=3,
44
+ n_points=pc_num_points,
45
+ scale_channels=0.75,
46
+ scale_voxel_resolution=0.75,
47
+ num_blocks=(1, 1, 1, 1),
48
+ out_channels=pc_latent_channels,
49
+ use_global_attention=False,
50
+ ),
51
+ )
52
+
53
+ grasp_encoder_config = dict(
54
+ type="ResNet1D",
55
+ args=dict(
56
+ in_features=grasp_representation_dims,
57
+ block_channels=(32, 64, 128, 256),
58
+ input_conditioning_dims=pc_latent_dims,
59
+ resnet_block_groups=4,
60
+ dropout=dropout,
61
+ ),
62
+ )
63
+
64
+ decoder_config = dict(
65
+ type="ResNet1D",
66
+ args=dict(
67
+ block_channels=(32, 64, 128, 256),
68
+ # out_dim=grasp_pose_dims,
69
+ input_conditioning_dims=pc_latent_dims,
70
+ resnet_block_groups=4,
71
+ dropout=dropout,
72
+ ),
73
+ )
74
+
75
+ loss_config = dict(
76
+ reconstruction_loss=dict(
77
+ type="GraspReconstructionLoss",
78
+ name="reconstruction_loss",
79
+ args=dict(translation_weight=1, rotation_weight=1),
80
+ ),
81
+ latent_loss=dict(
82
+ type="VAELatentLoss",
83
+ args=dict(
84
+ name="grasp_latent",
85
+ cyclical_annealing=True,
86
+ num_steps=max_steps,
87
+ num_cycles=1,
88
+ ratio=0.5,
89
+ start=1e-7,
90
+ stop=0.1,
91
+ ),
92
+ ),
93
+ classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
94
+ # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
95
+ )
96
+
97
+ denoiser_model = dict(
98
+ type="TimeConditionedResNet1D",
99
+ args=dict(
100
+ dim=grasp_latent_dims,
101
+ channels=1,
102
+ block_channels=(32, 64, 128, 256),
103
+ input_conditioning_dims=pc_latent_dims,
104
+ resnet_block_groups=4,
105
+ dropout=dropout,
106
+ is_time_conditioned=True,
107
+ learned_variance=False,
108
+ learned_sinusoidal_cond=False,
109
+ random_fourier_features=True,
110
+ # learned_sinusoidal_dim=16,
111
+ ),
112
+ )
113
+ # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
114
+ # See models/builder.py for more info.
115
+ model = dict(
116
+ vae=dict(
117
+ model=dict(
118
+ type="GraspCVAE",
119
+ args=dict(
120
+ grasp_latent_size=grasp_latent_dims,
121
+ pc_latent_size=pc_latent_dims,
122
+ pc_encoder_config=pc_encoder_config,
123
+ grasp_encoder_config=grasp_encoder_config,
124
+ decoder_config=decoder_config,
125
+ loss_config=loss_config,
126
+ num_output_qualities=num_output_qualities,
127
+ intermediate_feature_resolution=16,
128
+ ),
129
+ ),
130
+ ckpt_path=vae_ckpt_path,
131
+ ),
132
+ ddm=dict(
133
+ model=dict(
134
+ type="GraspLatentDDM",
135
+ args=dict(
136
+ model=denoiser_model,
137
+ latent_in_features=grasp_latent_dims,
138
+ diffusion_timesteps=1000,
139
+ noise_scheduler_type="ddpm",
140
+ diffusion_loss="l2",
141
+ beta_schedule="linear",
142
+ is_conditioned=True,
143
+ joint_training=False,
144
+ denoising_loss_weight=1,
145
+ variance_type="fixed_large",
146
+ elucidated_diffusion=False,
147
+ beta_start=0.00005,
148
+ beta_end=0.001,
149
+ ),
150
+ ),
151
+ ckpt_path=ddm_ckpt_path,
152
+ use_vae_ema_model=True,
153
+ ),
154
+ )
155
+ ## -- Data --
156
+ augs_config = [
157
+ dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
158
+ dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
159
+ dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
160
+ ]
161
+
162
+ root_data_dir = "/mnt/irisgpfs/projects/mis-urso/grasp/data/acronym"
163
+ object_categories = [
164
+ "Cup",
165
+ "Mug",
166
+ "Fork",
167
+ "Hat",
168
+ "Bottle",
169
+ "Bowl",
170
+ "Car",
171
+ "Donut",
172
+ "Laptop",
173
+ "MousePad",
174
+ "Pencil",
175
+ "Plate",
176
+ "ScrewDriver",
177
+ "WineBottle",
178
+ "Backpack",
179
+ "Bag",
180
+ "Banana",
181
+ "Battery",
182
+ "BeanBag",
183
+ "Bear",
184
+ "Book",
185
+ "Books",
186
+ "Camera",
187
+ "CerealBox",
188
+ "Cookie",
189
+ "Hammer",
190
+ "Hanger",
191
+ "Knife",
192
+ "MilkCarton",
193
+ "Painting",
194
+ "PillBottle",
195
+ "Plant",
196
+ "PowerSocket",
197
+ "PowerStrip",
198
+ "PS3",
199
+ "PSP",
200
+ "Ring",
201
+ "Scissors",
202
+ "Shampoo",
203
+ "Shoes",
204
+ "Sheep",
205
+ "Shower",
206
+ "Sink",
207
+ "SoapBottle",
208
+ "SodaCan",
209
+ "Spoon",
210
+ "Statue",
211
+ "Teacup",
212
+ "Teapot",
213
+ "ToiletPaper",
214
+ "ToyFigure",
215
+ "Wallet",
216
+ "WineGlass",
217
+ "Cow",
218
+ "Sheep",
219
+ "Cat",
220
+ "Dog",
221
+ "Pizza",
222
+ "Elephant",
223
+ "Donkey",
224
+ "RubiksCube",
225
+ "Tank",
226
+ "Truck",
227
+ "USBStick",
228
+ ]
229
+
230
+ train_data = dict(
231
+ type="AcronymShapenetPointclouds",
232
+ args=dict(
233
+ data_root_dir=root_data_dir,
234
+ batch_num_points_per_pc=pc_num_points,
235
+ batch_num_grasps_per_pc=100,
236
+ rotation_repr="mrp",
237
+ augs_config=augs_config,
238
+ split="train",
239
+ batch_failed_grasps_ratio=0,
240
+ use_dataset_statistics_for_norm=False,
241
+ filter_categories=object_categories,
242
+ load_fixed_subset_grasps_per_obj=None,
243
+ num_repeat_dataset=10,
244
+ ),
245
+ )
246
+
247
+ data = dict(
248
+ train=train_data,
249
+ )
250
+
251
+ # Patch: Mesh Categories. Used for simulation
252
+ mesh_root = "/home/kuldeep/phd/data/ACRONYM/"
253
+ mesh_root = (
254
+ mesh_root
255
+ if os.path.exists(mesh_root)
256
+ else "/mnt/irisgpfs/users/kbarad/grasp/data/acronym"
257
+ )
258
+ mesh_categories = object_categories
259
+
260
+ ## -------------------- Trainer --------------------
261
+ ## Logger
262
+ logger = dict(type="WandbLogger", project="full-pc-ema-63c")
263
+
264
+ optimizer = dict(
265
+ initial_lr=0.001,
266
+ scheduler=dict(
267
+ type="MultiStepLR",
268
+ args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
269
+ ),
270
+ )
271
+
272
+ trainer = dict(
273
+ max_steps=max_steps,
274
+ batch_size=batch_size,
275
+ num_workers=num_workers_per_gpu * num_gpus,
276
+ accelerator="gpu",
277
+ devices=num_gpus,
278
+ strategy="ddp",
279
+ logger=logger,
280
+ log_every_n_steps=100,
281
+ optimizer=optimizer,
282
+ resume_training_from_last=resume_training_from_last,
283
+ check_val_every_n_epoch=1,
284
+ ema=dict(
285
+ beta=0.990,
286
+ update_after_step=1000,
287
+ ),
288
+ deterministic=True,
289
+ )
checkpoints/generation/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k/ddm/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb9d2683e0d750df0c163b4ad4d5d9288ec9d7bb2b4246d11f90250dd856f28
3
+ size 25175359
checkpoints/generation/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k/ddm/exp16e3_partial_63cat8k_filtered_latentc3_z16_pc256_simple_180k.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
4
+ grasp_pose_dims = 6
5
+ num_output_qualities = 0
6
+
7
+ grasp_representation_dims = (
8
+ grasp_pose_dims + num_output_qualities + 1
9
+ if num_output_qualities is not None
10
+ else grasp_pose_dims + 1
11
+ )
12
+
13
+ grasp_latent_dims = 16
14
+ pc_latent_dims = 256
15
+ pc_latent_channels = 3
16
+
17
+ pc_num_points = 1024
18
+ batch_num_scenes = 10
19
+ # max_scenes = 10
20
+
21
+ # Max batch steps or epochs. Only one of them should be defined. If both, steps will considered.
22
+ max_steps = 180000
23
+ max_epochs = None
24
+
25
+ ## Checkpoints:
26
+ # If to auto check the exp directory and resume from last saved checkpoints
27
+ resume_training_from_last = True
28
+
29
+ # TODO: Not passed in config.
30
+ save_ckpt_every_n_epochs = 50
31
+ # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
32
+ vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
33
+ ddm_ckpt_path = None
34
+
35
+
36
+ ## -- Model --
37
+ dropout = 0.1 # or None
38
+
39
+ pc_encoder_config = dict(
40
+ type="PVCNNEncoder",
41
+ args=dict(
42
+ in_features=3,
43
+ n_points=pc_num_points,
44
+ scale_channels=0.75,
45
+ scale_voxel_resolution=0.75,
46
+ num_blocks=(1, 1, 1, 1),
47
+ out_channels=pc_latent_channels,
48
+ use_global_attention=False,
49
+ ),
50
+ )
51
+
52
+ grasp_encoder_config = dict(
53
+ type="ResNet1D",
54
+ args=dict(
55
+ in_features=grasp_representation_dims,
56
+ block_channels=(32, 64, 128, 256),
57
+ input_conditioning_dims=pc_latent_dims,
58
+ resnet_block_groups=4,
59
+ dropout=dropout,
60
+ ),
61
+ )
62
+
63
+ decoder_config = dict(
64
+ type="ResNet1D",
65
+ args=dict(
66
+ block_channels=(32, 64, 128, 256),
67
+ # out_dim=grasp_pose_dims,
68
+ input_conditioning_dims=pc_latent_dims,
69
+ resnet_block_groups=4,
70
+ dropout=dropout,
71
+ ),
72
+ )
73
+
74
+ loss_config = dict(
75
+ reconstruction_loss=dict(
76
+ type="GraspReconstructionLoss",
77
+ name="reconstruction_loss",
78
+ args=dict(translation_weight=1, rotation_weight=1),
79
+ ),
80
+ latent_loss=dict(
81
+ type="VAELatentLoss",
82
+ args=dict(
83
+ name="grasp_latent",
84
+ cyclical_annealing=True,
85
+ num_steps=max_steps,
86
+ num_cycles=1,
87
+ ratio=0.5,
88
+ start=1e-7,
89
+ stop=0.1,
90
+ ),
91
+ ),
92
+ classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
93
+ # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
94
+ )
95
+
96
+ denoiser_model = dict(
97
+ type="TimeConditionedResNet1D",
98
+ args=dict(
99
+ dim=grasp_latent_dims,
100
+ channels=1,
101
+ block_channels=(32, 64, 128, 256),
102
+ input_conditioning_dims=pc_latent_dims,
103
+ resnet_block_groups=4,
104
+ dropout=dropout,
105
+ is_time_conditioned=True,
106
+ learned_variance=False,
107
+ learned_sinusoidal_cond=False,
108
+ random_fourier_features=True,
109
+ # learned_sinusoidal_dim=16,
110
+ ),
111
+ )
112
+ # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
113
+ # See models/builder.py for more info.
114
+ models = dict(
115
+ vae=dict(
116
+ model=dict(
117
+ type="GraspCVAE",
118
+ args=dict(
119
+ grasp_latent_size=grasp_latent_dims,
120
+ pc_latent_size=pc_latent_dims,
121
+ pc_encoder_config=pc_encoder_config,
122
+ grasp_encoder_config=grasp_encoder_config,
123
+ decoder_config=decoder_config,
124
+ loss_config=loss_config,
125
+ num_output_qualities=num_output_qualities,
126
+ intermediate_feature_resolution=16,
127
+ ),
128
+ ),
129
+ ckpt_path=vae_ckpt_path,
130
+ ),
131
+ ddm=dict(
132
+ model=dict(
133
+ type="GraspLatentDDM",
134
+ args=dict(
135
+ model=denoiser_model,
136
+ latent_in_features=grasp_latent_dims,
137
+ diffusion_timesteps=1000,
138
+ noise_scheduler_type="ddpm",
139
+ diffusion_loss="l2",
140
+ beta_schedule="linear",
141
+ is_conditioned=True,
142
+ joint_training=False,
143
+ denoising_loss_weight=1,
144
+ variance_type="fixed_large",
145
+ elucidated_diffusion=False,
146
+ beta_start=0.00005,
147
+ beta_end=0.001,
148
+ ),
149
+ ),
150
+ ckpt_path=ddm_ckpt_path,
151
+ ),
152
+ )
153
+ ## -- Data --
154
+ augs_config = [
155
+ dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
156
+ dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
157
+ dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
158
+ ]
159
+
160
+ root_data_dir = (
161
+ "/mnt/irisgpfs/projects/mis-urso/grasp/data/acronym/renders/objects_filtered_grasps_63cat_8k/"
162
+ )
163
+ camera_json = "data/cameras/camera_d435i_dummy.json"
164
+ max_scenes = None
165
+ train_data = dict(
166
+ type="AcronymPartialPointclouds",
167
+ args=dict(
168
+ data_root_dir=root_data_dir,
169
+ max_scenes=max_scenes,
170
+ camera_json=camera_json,
171
+ num_points_per_pc=pc_num_points,
172
+ num_grasps_per_obj=100,
173
+ rotation_repr="mrp",
174
+ augs_config=augs_config,
175
+ split="train",
176
+ depth_px_scale=10000,
177
+ scene_prefix="scene_",
178
+ min_usable_pc_points=1024,
179
+ preempt_load_data=True,
180
+ use_failed_grasps=False,
181
+ failed_grasp_ratio=0.3,
182
+ load_fixed_grasp_transforms=None,
183
+ is_input_dataset_normalized=False,
184
+ ),
185
+ )
186
+
187
+ data = dict(
188
+ train=train_data,
189
+ )
190
+
191
+ # Patch: Mesh Categories. Used for simulation
192
+ mesh_root = "/home/kuldeep/phd/data/ACRONYM/"
193
+ mesh_root = (
194
+ mesh_root
195
+ if os.path.exists(mesh_root)
196
+ else "/mnt/irisgpfs/users/kbarad/grasp/data/acronym"
197
+ )
198
+ mesh_categories = ["Cup", "Mug", "Fork", "Hat", "Bottle", "Bowl", "Car", "Donut", "Laptop", "MousePad", "Pencil", "Plate", "ScrewDriver", "WineBottle", "Backpack", "Bag", "Banana", "Battery", "BeanBag", "Bear", "Book", "Books", "Camera", "CerealBox", "Cookie", "Hammer", "Hanger", "Knife", "MilkCarton", "Painting", "PillBottle", "Plant", "PowerSocket", "PowerStrip", "PS3", "PSP", "Ring", "Scissors", "Shampoo", "Shoes", "Sheep", "Shower", "Sink", "SoapBottle", "SodaCan", "Spoon", "Statue", "Teacup", "Teapot", "ToiletPaper", "ToyFigure", "Wallet", "WineGlass", "Cow", "Sheep", "Cat", "Dog", "Pizza", "Elephant", "Donkey", "RubiksCube", "Tank", "Truck", "USBStick"]
199
+
200
+ ## Logger
201
+ logger = dict(type="WandbLogger", project="partial-pc-baseline")
202
+
203
+ optimizer = dict(
204
+ initial_lr=0.001,
205
+ scheduler=dict(
206
+ type="MultiStepLR",
207
+ args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
208
+ ),
209
+ )
210
+
211
+ num_gpus = 1
212
+
213
+
214
+ steps_or_epochs = (
215
+ dict(max_steps=max_steps) if max_steps is not None else dict(max_epochs=max_epochs)
216
+ )
217
+
218
+ train = dict(
219
+ **steps_or_epochs,
220
+ batch_size=batch_num_scenes,
221
+ num_workers=7 * num_gpus,
222
+ accelerator="gpu",
223
+ devices=num_gpus,
224
+ strategy="ddp",
225
+ )
checkpoints/generation/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k/vae/checkpoints/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0e017b0790fcb742797dbd6c3a31cb9c48ba62020ebffd08408c3f395d44e7
3
+ size 20985977
checkpoints/generation/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k/vae/exp16e3_partial_63cat8k_filtered_latentc3_z16_pc256_simple_180k.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
4
+ grasp_pose_dims = 6
5
+ num_output_qualities = 0
6
+
7
+ grasp_representation_dims = (
8
+ grasp_pose_dims + num_output_qualities + 1
9
+ if num_output_qualities is not None
10
+ else grasp_pose_dims + 1
11
+ )
12
+
13
+ grasp_latent_dims = 16
14
+ pc_latent_dims = 256
15
+ pc_latent_channels = 3
16
+
17
+ pc_num_points = 1024
18
+ batch_num_scenes = 10
19
+ # max_scenes = 10
20
+
21
+ # Max batch steps or epochs. Only one of them should be defined. If both, steps will considered.
22
+ max_steps = 180000
23
+ max_epochs = None
24
+
25
+ ## Checkpoints:
26
+ # If to auto check the exp directory and resume from last saved checkpoints
27
+ resume_training_from_last = True
28
+
29
+ # TODO: Not passed in config.
30
+ save_ckpt_every_n_epochs = 50
31
+ # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
32
+ vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
33
+ ddm_ckpt_path = None
34
+
35
+
36
+ ## -- Model --
37
+ dropout = 0.1 # or None
38
+
39
+ pc_encoder_config = dict(
40
+ type="PVCNNEncoder",
41
+ args=dict(
42
+ in_features=3,
43
+ n_points=pc_num_points,
44
+ scale_channels=0.75,
45
+ scale_voxel_resolution=0.75,
46
+ num_blocks=(1, 1, 1, 1),
47
+ out_channels=pc_latent_channels,
48
+ use_global_attention=False,
49
+ ),
50
+ )
51
+
52
+ grasp_encoder_config = dict(
53
+ type="ResNet1D",
54
+ args=dict(
55
+ in_features=grasp_representation_dims,
56
+ block_channels=(32, 64, 128, 256),
57
+ input_conditioning_dims=pc_latent_dims,
58
+ resnet_block_groups=4,
59
+ dropout=dropout,
60
+ ),
61
+ )
62
+
63
+ decoder_config = dict(
64
+ type="ResNet1D",
65
+ args=dict(
66
+ block_channels=(32, 64, 128, 256),
67
+ # out_dim=grasp_pose_dims,
68
+ input_conditioning_dims=pc_latent_dims,
69
+ resnet_block_groups=4,
70
+ dropout=dropout,
71
+ ),
72
+ )
73
+
74
+ loss_config = dict(
75
+ reconstruction_loss=dict(
76
+ type="GraspReconstructionLoss",
77
+ name="reconstruction_loss",
78
+ args=dict(translation_weight=1, rotation_weight=1),
79
+ ),
80
+ latent_loss=dict(
81
+ type="VAELatentLoss",
82
+ args=dict(
83
+ name="grasp_latent",
84
+ cyclical_annealing=True,
85
+ num_steps=max_steps,
86
+ num_cycles=1,
87
+ ratio=0.5,
88
+ start=1e-7,
89
+ stop=0.1,
90
+ ),
91
+ ),
92
+ classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
93
+ # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
94
+ )
95
+
96
+ denoiser_model = dict(
97
+ type="TimeConditionedResNet1D",
98
+ args=dict(
99
+ dim=grasp_latent_dims,
100
+ channels=1,
101
+ block_channels=(32, 64, 128, 256),
102
+ input_conditioning_dims=pc_latent_dims,
103
+ resnet_block_groups=4,
104
+ dropout=dropout,
105
+ is_time_conditioned=True,
106
+ learned_variance=False,
107
+ learned_sinusoidal_cond=False,
108
+ random_fourier_features=True,
109
+ # learned_sinusoidal_dim=16,
110
+ ),
111
+ )
112
+ # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
113
+ # See models/builder.py for more info.
114
+ models = dict(
115
+ vae=dict(
116
+ model=dict(
117
+ type="GraspCVAE",
118
+ args=dict(
119
+ grasp_latent_size=grasp_latent_dims,
120
+ pc_latent_size=pc_latent_dims,
121
+ pc_encoder_config=pc_encoder_config,
122
+ grasp_encoder_config=grasp_encoder_config,
123
+ decoder_config=decoder_config,
124
+ loss_config=loss_config,
125
+ num_output_qualities=num_output_qualities,
126
+ intermediate_feature_resolution=16,
127
+ ),
128
+ ),
129
+ ckpt_path=vae_ckpt_path,
130
+ ),
131
+ ddm=dict(
132
+ model=dict(
133
+ type="GraspLatentDDM",
134
+ args=dict(
135
+ model=denoiser_model,
136
+ latent_in_features=grasp_latent_dims,
137
+ diffusion_timesteps=1000,
138
+ noise_scheduler_type="ddpm",
139
+ diffusion_loss="l2",
140
+ beta_schedule="linear",
141
+ is_conditioned=True,
142
+ joint_training=False,
143
+ denoising_loss_weight=1,
144
+ variance_type="fixed_large",
145
+ elucidated_diffusion=False,
146
+ beta_start=0.00005,
147
+ beta_end=0.001,
148
+ ),
149
+ ),
150
+ ckpt_path=ddm_ckpt_path,
151
+ ),
152
+ )
153
+ ## -- Data --
154
+ augs_config = [
155
+ dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
156
+ dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
157
+ dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
158
+ ]
159
+
160
+ root_data_dir = (
161
+ "/mnt/irisgpfs/projects/mis-urso/grasp/data/acronym/renders/objects_filtered_grasps_63cat_8k/"
162
+ )
163
+ camera_json = "data/cameras/camera_d435i_dummy.json"
164
+ max_scenes = None
165
+ train_data = dict(
166
+ type="AcronymPartialPointclouds",
167
+ args=dict(
168
+ data_root_dir=root_data_dir,
169
+ max_scenes=max_scenes,
170
+ camera_json=camera_json,
171
+ num_points_per_pc=pc_num_points,
172
+ num_grasps_per_obj=100,
173
+ rotation_repr="mrp",
174
+ augs_config=augs_config,
175
+ split="train",
176
+ depth_px_scale=10000,
177
+ scene_prefix="scene_",
178
+ min_usable_pc_points=1024,
179
+ preempt_load_data=True,
180
+ use_failed_grasps=False,
181
+ failed_grasp_ratio=0.3,
182
+ load_fixed_grasp_transforms=None,
183
+ is_input_dataset_normalized=False,
184
+ ),
185
+ )
186
+
187
+ data = dict(
188
+ train=train_data,
189
+ )
190
+
191
+ # Patch: Mesh Categories. Used for simulation
192
+ mesh_root = "/home/kuldeep/phd/data/ACRONYM/"
193
+ mesh_root = (
194
+ mesh_root
195
+ if os.path.exists(mesh_root)
196
+ else "/mnt/irisgpfs/users/kbarad/grasp/data/acronym"
197
+ )
198
+ mesh_categories = ["Cup", "Mug", "Fork", "Hat", "Bottle", "Bowl", "Car", "Donut", "Laptop", "MousePad", "Pencil", "Plate", "ScrewDriver", "WineBottle", "Backpack", "Bag", "Banana", "Battery", "BeanBag", "Bear", "Book", "Books", "Camera", "CerealBox", "Cookie", "Hammer", "Hanger", "Knife", "MilkCarton", "Painting", "PillBottle", "Plant", "PowerSocket", "PowerStrip", "PS3", "PSP", "Ring", "Scissors", "Shampoo", "Shoes", "Sheep", "Shower", "Sink", "SoapBottle", "SodaCan", "Spoon", "Statue", "Teacup", "Teapot", "ToiletPaper", "ToyFigure", "Wallet", "WineGlass", "Cow", "Sheep", "Cat", "Dog", "Pizza", "Elephant", "Donkey", "RubiksCube", "Tank", "Truck", "USBStick"]
199
+
200
+ ## Logger
201
+ logger = dict(type="WandbLogger", project="partial-pc-baseline")
202
+
203
+ optimizer = dict(
204
+ initial_lr=0.001,
205
+ scheduler=dict(
206
+ type="MultiStepLR",
207
+ args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
208
+ ),
209
+ )
210
+
211
+ num_gpus = 1
212
+
213
+
214
+ steps_or_epochs = (
215
+ dict(max_steps=max_steps) if max_steps is not None else dict(max_epochs=max_epochs)
216
+ )
217
+
218
+ train = dict(
219
+ **steps_or_epochs,
220
+ batch_size=batch_num_scenes,
221
+ num_workers=7 * num_gpus,
222
+ accelerator="gpu",
223
+ devices=num_gpus,
224
+ strategy="ddp",
225
+ )