|
unet_additional_kwargs: |
|
use_pixel_encoder: false |
|
use_img_encoder: false |
|
use_inflated_groupnorm: true |
|
unet_use_cross_frame_attention: false |
|
unet_use_temporal_attention: false |
|
use_motion_module: true |
|
use_motion_resnet: false |
|
motion_module_resolutions: |
|
- 1 |
|
- 2 |
|
- 4 |
|
- 8 |
|
motion_module_mid_block: true |
|
motion_module_decoder_only: false |
|
motion_module_type: Vanilla |
|
motion_module_kwargs: |
|
num_attention_heads: 8 |
|
num_transformer_block: 1 |
|
attention_block_types: |
|
- Temporal_Self |
|
- Temporal_Self |
|
temporal_position_encoding: true |
|
temporal_attention_dim_div: 1 |
|
|
|
noise_scheduler_kwargs: |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "linear" |
|
original_inference_steps: 100 |
|
steps_offset: 1 |