name: craftsman/craftsman-v1-5
description: 'image to 3d shape diffusion of CraftsMan3D(https://github.com/wyysf-98/CraftsMan3D)'

system_type: pixart-diffusion-system
system:
  z_scale_factor: 1.0
  guidance_scale: 7.5
  num_inference_steps: 50
  eta: 0.0
  extract_mesh_func: diffdmc
  shape_model_type: michelangelo-autoencoder
  shape_model:
    n_samples: 16384
    use_downsample: true
    downsample_ratio: 0.0625
    num_latents: 768
    use_multi_reso: false
    resolutions:
    - 4096
    - 8192
    - 12288
    sampling_prob:
    - 0
    - 0
    - 1
    embed_dim: 64
    point_feats: 3
    out_dim: 1
    num_freqs: 8
    include_pi: false
    heads: 12
    width: 768
    num_encoder_layers: 8
    num_decoder_layers: 16
    use_ln_post: true
    init_scale: 0.25
    qkv_bias: false
    use_flash: true
    use_checkpoint: true
  condition_model_type: cond-embedder
  condition_model:
    freeze_modulation_clip: true
    freeze_modulation_dino: true
    encode_camera: false
    camera_embeds_dim: 32
    n_views: 1
    empty_embeds_ratio: 0.1
    normalize_embeds: false
    zero_uncond_embeds: true
    linear_proj_init: constant
    image_size_dino: 224
    image_size_clip: 224
  denoiser_model_type: pixart-denoiser
  denoiser_model:
    input_channels: 64
    output_channels: 64
    n_ctx: 768
    width: 1024
    layers: 32
    heads: 16
    context_dim: 1024
    init_scale: 1.0
    use_checkpoint: true
  noise_scheduler_type: "diffusers.schedulers.DDPMScheduler"
  noise_scheduler:
    num_train_timesteps: 1000
    beta_start: 0.00085
    beta_end: 0.012
    beta_schedule: "scaled_linear"
    variance_type: "fixed_small"
    clip_sample: false
  denoise_scheduler_type: "diffusers.schedulers.DDIMScheduler"
  denoise_scheduler:
    num_train_timesteps: 1000
    beta_start: 0.00085
    beta_end: 0.012
    beta_schedule: "scaled_linear"
    clip_sample: false   # clip sample to -1~1
    set_alpha_to_one: false
    steps_offset: 1
  val_samples_json: ""
  loggers:
    wandb:
      enable: false
      project: CraftsMan
      name: image-to-shape-diffusion
  loss:
    loss_type: mse
    lambda_diffusion: 1.0
  optimizer:
    name: AdamW
    args:
      lr: 0.0002
      betas:
      - 0.9
      - 0.99
      eps: 1.0e-06
  scheduler:
    name: CosineAnnealingLR
    args:
      T_max: 5
      eta_min: 1.0e-06