Spaces:

yslan
/

LN3Diff_I23D

Running on Zero

NIRVANALAN

update

127ac36 3 months ago

7.49 kB

	{
	"image_size_encoder": 256,
	"triplane_scaling_divider": 0.96806,
	"diffusion_input_size": 32,
	"trainer_name": "flow_matching",
	"use_amp": true,
	"clip_denoised": false,
	"num_samples": 1,
	"num_instances": 10,
	"use_ddim": false,
	"ddpm_model_path": "",
	"cldm_model_path": "",
	"rec_model_path": "",
	"logdir": "./",
	"data_dir": "NONE",
	"eval_data_dir": "/cpfs01/user/lanyushi.p/Repo/eccv24/open-source/InstantMesh/test_dir",
	"eval_batch_size": 1,
	"num_workers": 0,
	"overfitting": false,
	"image_size": 256,
	"iterations": 5000001,
	"schedule_sampler": "uniform",
	"anneal_lr": false,
	"lr": 2e-05,
	"weight_decay": 0.05,
	"lr_anneal_steps": 0,
	"batch_size": 1,
	"microbatch": 1,
	"ema_rate": "0.9999",
	"log_interval": 50,
	"eval_interval": 5000,
	"save_interval": 10000,
	"resume_checkpoint": "checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors",
	"resume_cldm_checkpoint": "",
	"resume_checkpoint_EG3D": "",
	"use_fp16": false,
	"fp16_scale_growth": 0.001,
	"load_submodule_name": "",
	"ignore_resume_opt": false,
	"freeze_ae": false,
	"denoised_ae": true,
	"prompt": "a red chair",
	"interval": 5,
	"save_img": false,
	"use_train_trajectory": false,
	"unconditional_guidance_scale": 6.5,
	"use_eos_feature": false,
	"export_mesh": false,
	"cond_key": "img",
	"allow_tf32": true,
	"num_channels": 320,
	"num_res_blocks": 2,
	"num_heads": 8,
	"num_heads_upsample": -1,
	"num_head_channels": -1,
	"attention_resolutions": "4,2,1",
	"channel_mult": "",
	"dropout": 0.0,
	"class_cond": false,
	"use_checkpoint": false,
	"use_scale_shift_norm": true,
	"resblock_updown": false,
	"use_new_attention_order": false,
	"denoise_in_channels": 4,
	"denoise_out_channels": 4,
	"create_controlnet": false,
	"create_dit": true,
	"i23d": true,
	"create_unet_with_hint": false,
	"dit_model_arch": "DiT-PixArt-L/2",
	"use_spatial_transformer": true,
	"transformer_depth": 1,
	"context_dim": 1024,
	"pooling_ctx_dim": 768,
	"roll_out": true,
	"n_embed": null,
	"legacy": true,
	"mixing_logit_init": -6,
	"hint_channels": 3,
	"learn_sigma": false,
	"diffusion_steps": 1000,
	"noise_schedule": "linear",
	"standarization_xt": false,
	"timestep_respacing": "",
	"use_kl": false,
	"predict_xstart": false,
	"predict_v": true,
	"rescale_timesteps": false,
	"rescale_learned_sigmas": false,
	"mixed_prediction": false,
	"dino_version": "mv-sd-dit-dynaInp-trilatent",
	"encoder_in_channels": 10,
	"img_size": [
	256
	],
	"patch_size": 14,
	"in_chans": 384,
	"num_classes": 0,
	"embed_dim": 384,
	"depth": 6,
	"mlp_ratio": 4.0,
	"qkv_bias": false,
	"qk_scale": null,
	"drop_rate": 0.1,
	"attn_drop_rate": 0.0,
	"drop_path_rate": 0.0,
	"norm_layer": "nn.LayerNorm",
	"cls_token": false,
	"encoder_cls_token": false,
	"decoder_cls_token": false,
	"sr_kwargs": {},
	"sr_ratio": 2,
	"use_clip": false,
	"arch_encoder": "vits",
	"arch_decoder": "vitb",
	"load_pretrain_encoder": false,
	"encoder_lr": 1e-05,
	"encoder_weight_decay": 0.001,
	"no_dim_up_mlp": true,
	"dim_up_mlp_as_func": false,
	"decoder_load_pretrained": false,
	"uvit_skip_encoder": true,
	"vae_p": 2,
	"ldm_z_channels": 4,
	"ldm_embed_dim": 4,
	"use_conf_map": false,
	"sd_E_ch": 64,
	"z_channels": 12,
	"sd_E_num_res_blocks": 1,
	"num_frames": 6,
	"arch_dit_decoder": "DiT2-L/2",
	"return_all_dit_layers": false,
	"lrm_decoder": false,
	"plane_n": 3,
	"gs_rendering": false,
	"decomposed": true,
	"triplane_fg_bg": false,
	"cfg": "objverse_tuneray_aug_resolution_64_64_auto",
	"density_reg": 0.0,
	"density_reg_p_dist": 0.004,
	"reg_type": "l1",
	"triplane_decoder_lr": 5e-05,
	"super_resolution_lr": 5e-05,
	"c_scale": 1,
	"nsr_lr": 0.02,
	"triplane_size": 224,
	"decoder_in_chans": 32,
	"triplane_in_chans": 32,
	"decoder_output_dim": 3,
	"out_chans": 96,
	"c_dim": 25,
	"ray_start": 0.6,
	"ray_end": 1.8,
	"rendering_kwargs": {
	"image_resolution": 256,
	"disparity_space_sampling": false,
	"clamp_mode": "softplus",
	"c_gen_conditioning_zero": true,
	"c_scale": 1,
	"superresolution_noise_mode": "none",
	"density_reg": 0.0,
	"density_reg_p_dist": 0.004,
	"reg_type": "l1",
	"decoder_lr_mul": 1,
	"decoder_activation": "sigmoid",
	"sr_antialias": true,
	"return_triplane_features": false,
	"return_sampling_details_flag": true,
	"superresolution_module": "utils.torch_utils.components.NearestConvSR",
	"depth_resolution": 64,
	"depth_resolution_importance": 64,
	"ray_start": "auto",
	"ray_end": "auto",
	"box_warp": 0.9,
	"white_back": true,
	"radius_range": [
	1.5,
	2
	],
	"sampler_bbox_min": -0.45,
	"sampler_bbox_max": 0.45,
	"filter_out_of_bbox": true,
	"PatchRaySampler": true,
	"patch_rendering_resolution": 45,
	"z_near": 1.05,
	"z_far": 2.45
	},
	"sr_training": false,
	"bcg_synthesis": false,
	"bcg_synthesis_kwargs": {},
	"patch_rendering_resolution": 45,
	"vit_decoder_lr": 1e-05,
	"vit_decoder_wd": 0.001,
	"ae_classname": "vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder",
	"color_criterion": "mse",
	"l2_lambda": 1.0,
	"lpips_lambda": 0.8,
	"lpips_delay_iter": 0,
	"sr_delay_iter": 0,
	"kl_anneal": false,
	"latent_lambda": 0.0,
	"latent_criterion": "mse",
	"kl_lambda": 0.0,
	"ssim_lambda": 0.0,
	"l1_lambda": 0.0,
	"id_lambda": 0.0,
	"depth_lambda": 0.0,
	"alpha_lambda": 1.0,
	"fg_mse": false,
	"bg_lamdba": 0.01,
	"density_reg_every": 4,
	"shape_uniform_lambda": 0.005,
	"shape_importance_lambda": 0.01,
	"shape_depth_lambda": 0.0,
	"rec_cvD_lambda": 0.01,
	"nvs_cvD_lambda": 0.025,
	"patchgan_disc_factor": 0.01,
	"patchgan_disc_g_weight": 0.2,
	"r1_gamma": 1.0,
	"sds_lamdba": 1.0,
	"nvs_D_lr_mul": 1,
	"cano_D_lr_mul": 1,
	"ce_balanced_kl": 1.0,
	"p_eps_lambda": 1,
	"symmetry_loss": false,
	"depth_smoothness_lambda": 0.0,
	"ce_lambda": 0.5,
	"negative_entropy_lambda": 0.5,
	"grad_clip": true,
	"online_mask": false,
	"sde_time_eps": 0.01,
	"sde_beta_start": 0.1,
	"sde_beta_end": 20.0,
	"sde_sde_type": "vpsde",
	"sde_sigma2_0": 0.0,
	"iw_sample_p": "drop_sigma2t_iw",
	"iw_sample_q": "ll_iw",
	"iw_subvp_like_vp_sde": false,
	"train_vae": false,
	"pred_type": "v",
	"p_rendering_loss": false,
	"unfix_logit": false,
	"loss_type": "eps",
	"loss_weight": "simple",
	"diffusion_ce_anneal": true,
	"enable_mixing_normal": false,
	"only_mid_control": false,
	"control_key": "img",
	"normalize_clip_encoding": true,
	"scale_clip_encoding": 1.0,
	"cfg_dropout_prob": 0.1,
	"use_lmdb": false,
	"use_wds": false,
	"use_lmdb_compressed": false,
	"compile": false,
	"objv_dataset": true,
	"decode_encode_img_only": false,
	"load_wds_diff": true,
	"load_wds_latent": false,
	"eval_load_wds_instance": true,
	"shards_lst": "",
	"eval_shards_lst": "",
	"mv_input": true,
	"duplicate_sample": true,
	"orthog_duplicate": false,
	"split_chunk_input": false,
	"load_real": true,
	"four_view_for_latent": false,
	"single_view_for_i23d": false,
	"shuffle_across_cls": true,
	"load_extra_36_view": false,
	"mv_latent_dir": "",
	"append_depth": false,
	"plucker_embedding": true,
	"gs_cam_format": false,
	"split_chunk_size": 8,
	"path_type": "Linear",
	"prediction": "velocity",
	"sample_eps": null,
	"train_eps": null,
	"snr_type": "lognorm",
	"local_rank": 0,
	"gpus": 1
	}