model: pretrained_model_name_or_path: '/root/autodl-tmp/PartRAG/pretrained_weights/PartRAG' vae: num_tokens: 1024 transformer: enable_local_cross_attn: true global_attn_block_ids: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] global_attn_block_id_range: null dataset: config: - '/root/autodl-tmp/dataset/Objaverse/processed/high_quality_object_part_configs_FIXED.json' training_ratio: 0.9 min_num_parts: 2 max_num_parts: 8 max_iou_mean: 0.5 max_iou_max: 0.5 shuffle_parts: true object_ratio: 0.5 rotating_ratio: 0.3 ratating_degree: 15 optimizer: name: "adamw" lr: 3e-5 betas: [0.9, 0.999] weight_decay: 0.01 eps: 1.e-8 lr_scheduler: name: "cosine_warmup" num_warmup_steps: 300 retrieval: database_path: /root/autodl-tmp/retrieval_database_high_quality enabled: true top_k: 3 use_image: true use_mesh: true train: batch_size_per_gpu: 48 epochs: 350 grad_checkpoint: true weighting_scheme: "logit_normal" logit_mean: 0.0 logit_std: 1.0 mode_scale: 1.29 cfg_dropout_prob: 0.1 training_objective: "-v" log_freq: 10 early_eval_freq: 500 early_eval: 1000 eval_freq: 1000 save_freq: 1000 eval_freq_epoch: 5 save_freq_epoch: 1 ema_kwargs: decay: 0.9999 use_ema_warmup: true inv_gamma: 1. power: 0.75 use_part_dataset: true enable_contrastive: true contrastive_object_weight: 0.03 contrastive_part_weight: 0.03 contrastive_temperature: 0.07 # Bidirectional momentum queue (paper setting) use_momentum_queue: true momentum_coefficient: 0.999 momentum_queue_size: 65536 freeze_pretrained_backbone: true freeze_modules: - "pos_embed" - "time_embed" - "part_embedding" - "proj_in" - "blocks.0.attn1*" - "blocks.0.ff*" - "blocks.0.norm1" - "blocks.1.attn1*" - "blocks.1.ff*" - "blocks.1.norm1" trainable_modules: - "blocks.*.attn2*" - "blocks.*.norm2" - "blocks.[2-9].*" - "blocks.1[0-9].*" - "blocks.20.*" - "proj_out" use_differential_lr: true frozen_modules_lr: 0.0 pretrained_modules_lr: 1e-6 new_modules_lr: 1e-5 projection_modules_lr: 1e-5 val: batch_size_per_gpu: 1 nrow: 4 min_num_parts: 2 max_num_parts: 8 num_inference_steps: 50 max_num_expanded_coords: 1e8 use_flash_decoder: false rendering: radius: 4.0 num_views: 36 fps: 18 metric: cd_num_samples: 204800 cd_metric: "l2" f1_score_threshold: 0.1 default_cd: 1e6 default_f1: 0.0