File size: 2,500 Bytes
1ee07b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
model:
  pretrained_model_name_or_path: '/root/autodl-tmp/PartRAG/pretrained_weights/PartRAG'
  vae:
    num_tokens: 1024
  transformer:
    enable_local_cross_attn: true
    global_attn_block_ids: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
    global_attn_block_id_range: null

dataset:
  config:
    - '/root/autodl-tmp/dataset/Objaverse/processed/high_quality_object_part_configs_FIXED.json'
  training_ratio: 0.9
  min_num_parts: 2
  max_num_parts: 8
  max_iou_mean: 0.5
  max_iou_max: 0.5
  shuffle_parts: true
  object_ratio: 0.5
  rotating_ratio: 0.3
  ratating_degree: 15

optimizer:
  name: "adamw"
  lr: 3e-5
  betas: [0.9, 0.999]
  weight_decay: 0.01
  eps: 1.e-8

lr_scheduler:
  name: "cosine_warmup"
  num_warmup_steps: 300

retrieval:
  database_path: /root/autodl-tmp/retrieval_database_high_quality
  enabled: true
  top_k: 3
  use_image: true
  use_mesh: true

train:
  batch_size_per_gpu: 48
  epochs: 350
  grad_checkpoint: true
  weighting_scheme: "logit_normal"
  logit_mean: 0.0
  logit_std: 1.0
  mode_scale: 1.29
  cfg_dropout_prob: 0.1
  training_objective: "-v"
  log_freq: 10
  early_eval_freq: 500
  early_eval: 1000
  eval_freq: 1000
  save_freq: 1000
  eval_freq_epoch: 5
  save_freq_epoch: 1
  ema_kwargs:
    decay: 0.9999
    use_ema_warmup: true
    inv_gamma: 1.
    power: 0.75

  use_part_dataset: true
  enable_contrastive: true
  contrastive_object_weight: 0.03
  contrastive_part_weight: 0.03
  contrastive_temperature: 0.07

  # Bidirectional momentum queue (paper setting)
  use_momentum_queue: true
  momentum_coefficient: 0.999
  momentum_queue_size: 65536

  freeze_pretrained_backbone: true
  freeze_modules:
    - "pos_embed"
    - "time_embed"
    - "part_embedding"
    - "proj_in"
    - "blocks.0.attn1*"
    - "blocks.0.ff*"
    - "blocks.0.norm1"
    - "blocks.1.attn1*"
    - "blocks.1.ff*"
    - "blocks.1.norm1"
  trainable_modules:
    - "blocks.*.attn2*"
    - "blocks.*.norm2"
    - "blocks.[2-9].*"
    - "blocks.1[0-9].*"
    - "blocks.20.*"
    - "proj_out"

  use_differential_lr: true
  frozen_modules_lr: 0.0
  pretrained_modules_lr: 1e-6
  new_modules_lr: 1e-5
  projection_modules_lr: 1e-5

val:
  batch_size_per_gpu: 1
  nrow: 4
  min_num_parts: 2
  max_num_parts: 8
  num_inference_steps: 50
  max_num_expanded_coords: 1e8
  use_flash_decoder: false
  rendering:
    radius: 4.0
    num_views: 36
    fps: 18
  metric:
    cd_num_samples: 204800
    cd_metric: "l2"
    f1_score_threshold: 0.1
    default_cd: 1e6
    default_f1: 0.0