| unet_additional_kwargs:
|
| use_inflated_groupnorm: true
|
| unet_use_cross_frame_attention: false
|
| unet_use_temporal_attention: false
|
| use_motion_module: true
|
| motion_module_resolutions:
|
| - 1
|
| - 2
|
| - 4
|
| - 8
|
| motion_module_mid_block: true
|
| motion_module_decoder_only: false
|
| motion_module_type: Vanilla
|
| motion_module_kwargs:
|
| num_attention_heads: 8
|
| num_transformer_block: 1
|
| attention_block_types:
|
| - Temporal_Self
|
| - Temporal_Self
|
| temporal_position_encoding: true
|
| temporal_position_encoding_max_len: 32
|
| temporal_attention_dim_div: 1
|
|
|
| noise_scheduler_kwargs:
|
| beta_start: 0.00085
|
| beta_end: 0.012
|
| beta_schedule: "scaled_linear"
|
| clip_sample: false
|
| steps_offset: 1
|
|
|
| prediction_type: "v_prediction"
|
| rescale_betas_zero_snr: True
|
| timestep_spacing: "trailing"
|
|
|
| sampler: DDIM
|
|
|