PartRAG / params.yaml
michaelpopo's picture
Initial clean PartRAG release
1ee07b0 verified
raw
history blame
2.5 kB
model:
pretrained_model_name_or_path: '/root/autodl-tmp/PartRAG/pretrained_weights/PartRAG'
vae:
num_tokens: 1024
transformer:
enable_local_cross_attn: true
global_attn_block_ids: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
global_attn_block_id_range: null
dataset:
config:
- '/root/autodl-tmp/dataset/Objaverse/processed/high_quality_object_part_configs_FIXED.json'
training_ratio: 0.9
min_num_parts: 2
max_num_parts: 8
max_iou_mean: 0.5
max_iou_max: 0.5
shuffle_parts: true
object_ratio: 0.5
rotating_ratio: 0.3
ratating_degree: 15
optimizer:
name: "adamw"
lr: 3e-5
betas: [0.9, 0.999]
weight_decay: 0.01
eps: 1.e-8
lr_scheduler:
name: "cosine_warmup"
num_warmup_steps: 300
retrieval:
database_path: /root/autodl-tmp/retrieval_database_high_quality
enabled: true
top_k: 3
use_image: true
use_mesh: true
train:
batch_size_per_gpu: 48
epochs: 350
grad_checkpoint: true
weighting_scheme: "logit_normal"
logit_mean: 0.0
logit_std: 1.0
mode_scale: 1.29
cfg_dropout_prob: 0.1
training_objective: "-v"
log_freq: 10
early_eval_freq: 500
early_eval: 1000
eval_freq: 1000
save_freq: 1000
eval_freq_epoch: 5
save_freq_epoch: 1
ema_kwargs:
decay: 0.9999
use_ema_warmup: true
inv_gamma: 1.
power: 0.75
use_part_dataset: true
enable_contrastive: true
contrastive_object_weight: 0.03
contrastive_part_weight: 0.03
contrastive_temperature: 0.07
# Bidirectional momentum queue (paper setting)
use_momentum_queue: true
momentum_coefficient: 0.999
momentum_queue_size: 65536
freeze_pretrained_backbone: true
freeze_modules:
- "pos_embed"
- "time_embed"
- "part_embedding"
- "proj_in"
- "blocks.0.attn1*"
- "blocks.0.ff*"
- "blocks.0.norm1"
- "blocks.1.attn1*"
- "blocks.1.ff*"
- "blocks.1.norm1"
trainable_modules:
- "blocks.*.attn2*"
- "blocks.*.norm2"
- "blocks.[2-9].*"
- "blocks.1[0-9].*"
- "blocks.20.*"
- "proj_out"
use_differential_lr: true
frozen_modules_lr: 0.0
pretrained_modules_lr: 1e-6
new_modules_lr: 1e-5
projection_modules_lr: 1e-5
val:
batch_size_per_gpu: 1
nrow: 4
min_num_parts: 2
max_num_parts: 8
num_inference_steps: 50
max_num_expanded_coords: 1e8
use_flash_decoder: false
rendering:
radius: 4.0
num_views: 36
fps: 18
metric:
cd_num_samples: 204800
cd_metric: "l2"
f1_score_threshold: 0.1
default_cd: 1e6
default_f1: 0.0