LN3Diff_I23D / configs /i23d_args.json
NIRVANALAN
update
127ac36
raw
history blame
7.49 kB
{
"image_size_encoder": 256,
"triplane_scaling_divider": 0.96806,
"diffusion_input_size": 32,
"trainer_name": "flow_matching",
"use_amp": true,
"clip_denoised": false,
"num_samples": 1,
"num_instances": 10,
"use_ddim": false,
"ddpm_model_path": "",
"cldm_model_path": "",
"rec_model_path": "",
"logdir": "./",
"data_dir": "NONE",
"eval_data_dir": "/cpfs01/user/lanyushi.p/Repo/eccv24/open-source/InstantMesh/test_dir",
"eval_batch_size": 1,
"num_workers": 0,
"overfitting": false,
"image_size": 256,
"iterations": 5000001,
"schedule_sampler": "uniform",
"anneal_lr": false,
"lr": 2e-05,
"weight_decay": 0.05,
"lr_anneal_steps": 0,
"batch_size": 1,
"microbatch": 1,
"ema_rate": "0.9999",
"log_interval": 50,
"eval_interval": 5000,
"save_interval": 10000,
"resume_checkpoint": "checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors",
"resume_cldm_checkpoint": "",
"resume_checkpoint_EG3D": "",
"use_fp16": false,
"fp16_scale_growth": 0.001,
"load_submodule_name": "",
"ignore_resume_opt": false,
"freeze_ae": false,
"denoised_ae": true,
"prompt": "a red chair",
"interval": 5,
"save_img": false,
"use_train_trajectory": false,
"unconditional_guidance_scale": 6.5,
"use_eos_feature": false,
"export_mesh": false,
"cond_key": "img",
"allow_tf32": true,
"num_channels": 320,
"num_res_blocks": 2,
"num_heads": 8,
"num_heads_upsample": -1,
"num_head_channels": -1,
"attention_resolutions": "4,2,1",
"channel_mult": "",
"dropout": 0.0,
"class_cond": false,
"use_checkpoint": false,
"use_scale_shift_norm": true,
"resblock_updown": false,
"use_new_attention_order": false,
"denoise_in_channels": 4,
"denoise_out_channels": 4,
"create_controlnet": false,
"create_dit": true,
"i23d": true,
"create_unet_with_hint": false,
"dit_model_arch": "DiT-PixArt-L/2",
"use_spatial_transformer": true,
"transformer_depth": 1,
"context_dim": 1024,
"pooling_ctx_dim": 768,
"roll_out": true,
"n_embed": null,
"legacy": true,
"mixing_logit_init": -6,
"hint_channels": 3,
"learn_sigma": false,
"diffusion_steps": 1000,
"noise_schedule": "linear",
"standarization_xt": false,
"timestep_respacing": "",
"use_kl": false,
"predict_xstart": false,
"predict_v": true,
"rescale_timesteps": false,
"rescale_learned_sigmas": false,
"mixed_prediction": false,
"dino_version": "mv-sd-dit-dynaInp-trilatent",
"encoder_in_channels": 10,
"img_size": [
256
],
"patch_size": 14,
"in_chans": 384,
"num_classes": 0,
"embed_dim": 384,
"depth": 6,
"mlp_ratio": 4.0,
"qkv_bias": false,
"qk_scale": null,
"drop_rate": 0.1,
"attn_drop_rate": 0.0,
"drop_path_rate": 0.0,
"norm_layer": "nn.LayerNorm",
"cls_token": false,
"encoder_cls_token": false,
"decoder_cls_token": false,
"sr_kwargs": {},
"sr_ratio": 2,
"use_clip": false,
"arch_encoder": "vits",
"arch_decoder": "vitb",
"load_pretrain_encoder": false,
"encoder_lr": 1e-05,
"encoder_weight_decay": 0.001,
"no_dim_up_mlp": true,
"dim_up_mlp_as_func": false,
"decoder_load_pretrained": false,
"uvit_skip_encoder": true,
"vae_p": 2,
"ldm_z_channels": 4,
"ldm_embed_dim": 4,
"use_conf_map": false,
"sd_E_ch": 64,
"z_channels": 12,
"sd_E_num_res_blocks": 1,
"num_frames": 6,
"arch_dit_decoder": "DiT2-L/2",
"return_all_dit_layers": false,
"lrm_decoder": false,
"plane_n": 3,
"gs_rendering": false,
"decomposed": true,
"triplane_fg_bg": false,
"cfg": "objverse_tuneray_aug_resolution_64_64_auto",
"density_reg": 0.0,
"density_reg_p_dist": 0.004,
"reg_type": "l1",
"triplane_decoder_lr": 5e-05,
"super_resolution_lr": 5e-05,
"c_scale": 1,
"nsr_lr": 0.02,
"triplane_size": 224,
"decoder_in_chans": 32,
"triplane_in_chans": 32,
"decoder_output_dim": 3,
"out_chans": 96,
"c_dim": 25,
"ray_start": 0.6,
"ray_end": 1.8,
"rendering_kwargs": {
"image_resolution": 256,
"disparity_space_sampling": false,
"clamp_mode": "softplus",
"c_gen_conditioning_zero": true,
"c_scale": 1,
"superresolution_noise_mode": "none",
"density_reg": 0.0,
"density_reg_p_dist": 0.004,
"reg_type": "l1",
"decoder_lr_mul": 1,
"decoder_activation": "sigmoid",
"sr_antialias": true,
"return_triplane_features": false,
"return_sampling_details_flag": true,
"superresolution_module": "utils.torch_utils.components.NearestConvSR",
"depth_resolution": 64,
"depth_resolution_importance": 64,
"ray_start": "auto",
"ray_end": "auto",
"box_warp": 0.9,
"white_back": true,
"radius_range": [
1.5,
2
],
"sampler_bbox_min": -0.45,
"sampler_bbox_max": 0.45,
"filter_out_of_bbox": true,
"PatchRaySampler": true,
"patch_rendering_resolution": 45,
"z_near": 1.05,
"z_far": 2.45
},
"sr_training": false,
"bcg_synthesis": false,
"bcg_synthesis_kwargs": {},
"patch_rendering_resolution": 45,
"vit_decoder_lr": 1e-05,
"vit_decoder_wd": 0.001,
"ae_classname": "vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder",
"color_criterion": "mse",
"l2_lambda": 1.0,
"lpips_lambda": 0.8,
"lpips_delay_iter": 0,
"sr_delay_iter": 0,
"kl_anneal": false,
"latent_lambda": 0.0,
"latent_criterion": "mse",
"kl_lambda": 0.0,
"ssim_lambda": 0.0,
"l1_lambda": 0.0,
"id_lambda": 0.0,
"depth_lambda": 0.0,
"alpha_lambda": 1.0,
"fg_mse": false,
"bg_lamdba": 0.01,
"density_reg_every": 4,
"shape_uniform_lambda": 0.005,
"shape_importance_lambda": 0.01,
"shape_depth_lambda": 0.0,
"rec_cvD_lambda": 0.01,
"nvs_cvD_lambda": 0.025,
"patchgan_disc_factor": 0.01,
"patchgan_disc_g_weight": 0.2,
"r1_gamma": 1.0,
"sds_lamdba": 1.0,
"nvs_D_lr_mul": 1,
"cano_D_lr_mul": 1,
"ce_balanced_kl": 1.0,
"p_eps_lambda": 1,
"symmetry_loss": false,
"depth_smoothness_lambda": 0.0,
"ce_lambda": 0.5,
"negative_entropy_lambda": 0.5,
"grad_clip": true,
"online_mask": false,
"sde_time_eps": 0.01,
"sde_beta_start": 0.1,
"sde_beta_end": 20.0,
"sde_sde_type": "vpsde",
"sde_sigma2_0": 0.0,
"iw_sample_p": "drop_sigma2t_iw",
"iw_sample_q": "ll_iw",
"iw_subvp_like_vp_sde": false,
"train_vae": false,
"pred_type": "v",
"p_rendering_loss": false,
"unfix_logit": false,
"loss_type": "eps",
"loss_weight": "simple",
"diffusion_ce_anneal": true,
"enable_mixing_normal": false,
"only_mid_control": false,
"control_key": "img",
"normalize_clip_encoding": true,
"scale_clip_encoding": 1.0,
"cfg_dropout_prob": 0.1,
"use_lmdb": false,
"use_wds": false,
"use_lmdb_compressed": false,
"compile": false,
"objv_dataset": true,
"decode_encode_img_only": false,
"load_wds_diff": true,
"load_wds_latent": false,
"eval_load_wds_instance": true,
"shards_lst": "",
"eval_shards_lst": "",
"mv_input": true,
"duplicate_sample": true,
"orthog_duplicate": false,
"split_chunk_input": false,
"load_real": true,
"four_view_for_latent": false,
"single_view_for_i23d": false,
"shuffle_across_cls": true,
"load_extra_36_view": false,
"mv_latent_dir": "",
"append_depth": false,
"plucker_embedding": true,
"gs_cam_format": false,
"split_chunk_size": 8,
"path_type": "Linear",
"prediction": "velocity",
"sample_eps": null,
"train_eps": null,
"snr_type": "lognorm",
"local_rank": 0,
"gpus": 1
}