QingyuShi commited on
Commit
5d8c7a5
·
verified ·
1 Parent(s): 1293b32

Upload files excluding .pt

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. baseline/config.resolved.json +126 -0
  2. baseline/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_141611.txt +12 -0
  3. baseline/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_142846.txt +16 -0
  4. baseline/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_163636.txt +12 -0
  5. baseline/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_164909.txt +16 -0
  6. baseline/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_185706.txt +12 -0
  7. baseline/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_190941.txt +16 -0
  8. baseline/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211749.txt +12 -0
  9. baseline/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213021.txt +16 -0
  10. baseline/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_233832.txt +12 -0
  11. baseline/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_235105.txt +16 -0
  12. baseline/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_015904.txt +12 -0
  13. baseline/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_021139.txt +16 -0
  14. baseline/logs/log.txt +0 -0
  15. baseline/training_metrics.json +0 -0
  16. baseline_dinov3_uf/config.resolved.json +127 -0
  17. baseline_dinov3_uf/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_150031.txt +12 -0
  18. baseline_dinov3_uf/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_151306.txt +16 -0
  19. baseline_dinov3_uf/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_170639.txt +12 -0
  20. baseline_dinov3_uf/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_171913.txt +16 -0
  21. baseline_dinov3_uf/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_191303.txt +12 -0
  22. baseline_dinov3_uf/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_192539.txt +16 -0
  23. baseline_dinov3_uf/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211926.txt +12 -0
  24. baseline_dinov3_uf/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213159.txt +16 -0
  25. baseline_dinov3_uf/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_232551.txt +12 -0
  26. baseline_dinov3_uf/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_233826.txt +16 -0
  27. baseline_dinov3_uf/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_013229.txt +12 -0
  28. baseline_dinov3_uf/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_014506.txt +16 -0
  29. baseline_dinov3_uf/logs/log.txt +0 -0
  30. baseline_dinov3_uf/training_metrics.json +0 -0
  31. rec_only_dinov3/config.resolved.json +127 -0
  32. rec_only_dinov3/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_110622.txt +16 -0
  33. rec_only_dinov3/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_124235.txt +16 -0
  34. rec_only_dinov3/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_141847.txt +16 -0
  35. rec_only_dinov3/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_155459.txt +16 -0
  36. rec_only_dinov3/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_173111.txt +16 -0
  37. rec_only_dinov3/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_190723.txt +16 -0
  38. rec_only_dinov3/eval_results/eval_0007_epoch_0070_reconstruction_step_00087570_20260528_204340.txt +16 -0
  39. rec_only_dinov3/eval_results/eval_0008_epoch_0080_reconstruction_step_00100080_20260528_221953.txt +16 -0
  40. rec_only_dinov3/eval_results/eval_0009_epoch_0090_reconstruction_step_00112590_20260528_235612.txt +16 -0
  41. rec_only_dinov3/eval_results/eval_0010_epoch_0100_reconstruction_step_00125100_20260529_013235.txt +16 -0
  42. rec_only_dinov3/logs/log.txt +0 -0
  43. rec_only_dinov3/training_metrics.json +0 -0
  44. rec_only_dinov3_uf/config.resolved.json +127 -0
  45. rec_only_dinov3_uf/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_134902.txt +16 -0
  46. rec_only_dinov3_uf/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_153954.txt +16 -0
  47. rec_only_dinov3_uf/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_173108.txt +16 -0
  48. rec_only_dinov3_uf/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_192208.txt +16 -0
  49. rec_only_dinov3_uf/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_211317.txt +16 -0
  50. rec_only_dinov3_uf/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_230434.txt +16 -0
baseline/config.resolved.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "output_dir": "work_dirs/baseline",
4
+ "model": {
5
+ "img_size": 256,
6
+ "input_range": "minus_one_one",
7
+ "num_classes": 1000,
8
+ "encoder_type": "rectok",
9
+ "encoder_model_size": "base",
10
+ "encoder_patch_size": 16,
11
+ "token_channels": 128,
12
+ "mask_ratio": 0.4,
13
+ "mask_ratio_min": -0.1,
14
+ "mask_ratio_type": "random",
15
+ "use_qknorm_encoder": false,
16
+ "latent_hw": 16,
17
+ "decoder_model": "JiTCoT-B/16",
18
+ "decoder_patch_size": 16,
19
+ "bottleneck_dim_latent": 128,
20
+ "dh_depth": 2,
21
+ "dh_hidden_size": 1024,
22
+ "attn_dropout": 0.0,
23
+ "proj_dropout": 0.0,
24
+ "enable_ema": true,
25
+ "ema_decay1": 0.9999,
26
+ "ema_decay2": 0.9998,
27
+ "label_drop_prob": 0.1,
28
+ "P_mean": -0.4,
29
+ "P_std": 0.8,
30
+ "latent_mean": -1.2,
31
+ "latent_std": 1.0,
32
+ "latent_weight": 1.0,
33
+ "choose_latent_p": 0.4,
34
+ "perceptual_weight": 1.0,
35
+ "perceptual_net": "lpips-convnext_s-1.0-0.1",
36
+ "sample_mode": "latent_first_cascaded_noised",
37
+ "latent_max_t": 1.0,
38
+ "latent_pixel_offset": 0.0,
39
+ "latent_pixel_shift": 1.0,
40
+ "t_eps": 0.05,
41
+ "t_eps_inference": 0.05,
42
+ "noise_scale": 1.0,
43
+ "sampling_method": "heun",
44
+ "num_sampling_steps": 50,
45
+ "cfg": 1.0,
46
+ "cfg_latent": 1.0,
47
+ "interval_min": 0.0,
48
+ "interval_max": 1.0,
49
+ "interval_min_latent": 0.0,
50
+ "interval_max_latent": 1.0,
51
+ "gen_shift_pixel": 1.0,
52
+ "gen_shift_latent": 1.0,
53
+ "guidance_method": "cfg"
54
+ },
55
+ "data": {
56
+ "train_dir": "data/imagenet/train",
57
+ "val_dir": "data/imagenet/val",
58
+ "num_workers": 8,
59
+ "pin_memory": true,
60
+ "persistent_workers": true
61
+ },
62
+ "train": {
63
+ "epochs": 200,
64
+ "global_batch_size": 1024,
65
+ "eval_global_batch_size": 1024,
66
+ "grad_accum_steps": 1,
67
+ "grad_clip": 3.0,
68
+ "amp_dtype": "bf16",
69
+ "log_interval": 50
70
+ },
71
+ "visualization": {
72
+ "initial_visualization": true,
73
+ "vis_interval": 100,
74
+ "visualize_reconstruction": true,
75
+ "visualize_generation": true
76
+ },
77
+ "eval": {
78
+ "initial_eval": {
79
+ "reconstruction": false,
80
+ "generation": false
81
+ },
82
+ "gfid_interval": 10,
83
+ "rfid_interval": 10,
84
+ "gfid_stats_path": "fid_stats/jit_in256_stats.npz",
85
+ "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
86
+ "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
87
+ "gfid_backend": "online",
88
+ "gfid_num_classes": 1000,
89
+ "gfid_num_images": 50000,
90
+ "rfid_num_images": 50000,
91
+ "batch_size": 64,
92
+ "num_workers": 8,
93
+ "gfid_metric_verbose": false,
94
+ "gfid_keep_images": false,
95
+ "gfid_cfg_scale": null,
96
+ "gfid_cfg_scale_latent": null,
97
+ "gfid_cfg_interval": null,
98
+ "gfid_cfg_interval_latent": null,
99
+ "gfid_steps": null,
100
+ "eval_ema": "1"
101
+ },
102
+ "optim": {
103
+ "name": "adamw",
104
+ "lr": 0.0001,
105
+ "lr_schedule": "constant",
106
+ "weight_decay": 0.0,
107
+ "betas": [
108
+ 0.9,
109
+ 0.95
110
+ ],
111
+ "min_lr": 1e-06,
112
+ "warmup_epochs": 5
113
+ },
114
+ "checkpoint": {
115
+ "resume": "",
116
+ "auto_resume": true,
117
+ "save_interval": 1,
118
+ "keep_last": 3
119
+ },
120
+ "logging": {
121
+ "enable_wandb": false,
122
+ "entity": "",
123
+ "project": "diffusion-decoder",
124
+ "run_name": "diffusion_decoder_imagenet256"
125
+ }
126
+ }
baseline/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_141611.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T14:16:11",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 310.2751159667969,
9
+ "ginception/score": 1.1903746128082275,
10
+ "ginception/std": 0.14354869723320007
11
+ }
12
+ }
baseline/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_142846.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T14:28:46",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 238.10910034179688,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 238.10910034179688,
10
+ "rinception/score": 1.5671464204788208,
11
+ "rinception/std": 0.2834608256816864,
12
+ "rl1/score": 0.3674238417053223,
13
+ "rlpips/score": 0.8003478924560546,
14
+ "rpsnr/score": 7.616721407775879
15
+ }
16
+ }
baseline/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_163636.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T16:36:36",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 398.9952392578125,
9
+ "ginception/score": 1.0350425243377686,
10
+ "ginception/std": 0.016915658488869667
11
+ }
12
+ }
baseline/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_164909.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T16:49:09",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 48.23517608642578,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 48.23517608642578,
10
+ "rinception/score": 24.853290557861328,
11
+ "rinception/std": 5.148846626281738,
12
+ "rl1/score": 0.21535656043052673,
13
+ "rlpips/score": 0.5141746402549744,
14
+ "rpsnr/score": 12.606189897460938
15
+ }
16
+ }
baseline/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_185706.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T18:57:06",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 315.4123840332031,
9
+ "ginception/score": 1.8223289251327515,
10
+ "ginception/std": 0.43513810634613037
11
+ }
12
+ }
baseline/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_190941.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T19:09:41",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 2.729315757751465,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 2.729315757751465,
10
+ "rinception/score": 56.801666259765625,
11
+ "rinception/std": 7.076909065246582,
12
+ "rl1/score": 0.10776259976387025,
13
+ "rlpips/score": 0.20746465351104737,
14
+ "rpsnr/score": 18.78563266845703
15
+ }
16
+ }
baseline/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211749.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T21:17:49",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 100.38690948486328,
9
+ "ginception/score": 9.55769157409668,
10
+ "ginception/std": 0.47217872738838196
11
+ }
12
+ }
baseline/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213021.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T21:30:21",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.2609989643096924,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.2609989643096924,
10
+ "rinception/score": 58.3740234375,
11
+ "rinception/std": 7.179546356201172,
12
+ "rl1/score": 0.04572096435785294,
13
+ "rlpips/score": 0.1400455956363678,
14
+ "rpsnr/score": 25.163568264160155
15
+ }
16
+ }
baseline/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_233832.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:38:32",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 71.64373779296875,
9
+ "ginception/score": 17.276798248291016,
10
+ "ginception/std": 0.2812263071537018
11
+ }
12
+ }
baseline/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_235105.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:51:05",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.8307203054428101,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.8307203054428101,
10
+ "rinception/score": 59.03937911987305,
11
+ "rinception/std": 7.30922794342041,
12
+ "rl1/score": 0.03047736176252365,
13
+ "rlpips/score": 0.1192132255268097,
14
+ "rpsnr/score": 27.93316936279297
15
+ }
16
+ }
baseline/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_015904.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-29T01:59:04",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 59.03805160522461,
9
+ "ginception/score": 23.49216079711914,
10
+ "ginception/std": 0.4643714427947998
11
+ }
12
+ }
baseline/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_021139.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-29T02:11:39",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.6298550963401794,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.6298550963401794,
10
+ "rinception/score": 59.487831115722656,
11
+ "rinception/std": 7.161473751068115,
12
+ "rl1/score": 0.028371377193927766,
13
+ "rlpips/score": 0.11277780175209046,
14
+ "rpsnr/score": 28.530626791992187
15
+ }
16
+ }
baseline/logs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
baseline/training_metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
baseline_dinov3_uf/config.resolved.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "output_dir": "work_dirs/baseline_dinov3_uf",
4
+ "model": {
5
+ "img_size": 256,
6
+ "input_range": "minus_one_one",
7
+ "num_classes": 1000,
8
+ "encoder_type": "dinov3",
9
+ "encoder_model_size": "base",
10
+ "encoder_patch_size": 16,
11
+ "freeze_encoder_backbone": false,
12
+ "token_channels": 128,
13
+ "mask_ratio": 0.0,
14
+ "mask_ratio_min": 0.0,
15
+ "mask_ratio_type": "random",
16
+ "use_qknorm_encoder": false,
17
+ "latent_hw": 16,
18
+ "decoder_model": "JiTCoT-B/16",
19
+ "decoder_patch_size": 16,
20
+ "bottleneck_dim_latent": 128,
21
+ "dh_depth": 2,
22
+ "dh_hidden_size": 1024,
23
+ "attn_dropout": 0.0,
24
+ "proj_dropout": 0.0,
25
+ "enable_ema": true,
26
+ "ema_decay1": 0.9999,
27
+ "ema_decay2": 0.9998,
28
+ "label_drop_prob": 0.1,
29
+ "P_mean": -0.4,
30
+ "P_std": 0.8,
31
+ "latent_mean": -1.2,
32
+ "latent_std": 1.0,
33
+ "latent_weight": 1.0,
34
+ "choose_latent_p": 0.4,
35
+ "perceptual_weight": 1.0,
36
+ "perceptual_net": "lpips-convnext_s-1.0-0.1",
37
+ "sample_mode": "latent_first_cascaded_noised",
38
+ "latent_max_t": 1.0,
39
+ "latent_pixel_offset": 0.0,
40
+ "latent_pixel_shift": 1.0,
41
+ "t_eps": 0.05,
42
+ "t_eps_inference": 0.05,
43
+ "noise_scale": 1.0,
44
+ "sampling_method": "heun",
45
+ "num_sampling_steps": 50,
46
+ "cfg": 1.0,
47
+ "cfg_latent": 1.0,
48
+ "interval_min": 0.0,
49
+ "interval_max": 1.0,
50
+ "interval_min_latent": 0.0,
51
+ "interval_max_latent": 1.0,
52
+ "gen_shift_pixel": 1.0,
53
+ "gen_shift_latent": 1.0,
54
+ "guidance_method": "cfg"
55
+ },
56
+ "data": {
57
+ "train_dir": "data/imagenet/train",
58
+ "val_dir": "data/imagenet/val",
59
+ "num_workers": 8,
60
+ "pin_memory": true,
61
+ "persistent_workers": true
62
+ },
63
+ "train": {
64
+ "epochs": 200,
65
+ "global_batch_size": 1024,
66
+ "eval_global_batch_size": 1024,
67
+ "grad_accum_steps": 1,
68
+ "grad_clip": 3.0,
69
+ "amp_dtype": "bf16",
70
+ "log_interval": 50
71
+ },
72
+ "visualization": {
73
+ "initial_visualization": true,
74
+ "vis_interval": 500,
75
+ "visualize_reconstruction": true,
76
+ "visualize_generation": true
77
+ },
78
+ "eval": {
79
+ "initial_eval": {
80
+ "reconstruction": false,
81
+ "generation": false
82
+ },
83
+ "gfid_interval": 10,
84
+ "rfid_interval": 10,
85
+ "gfid_stats_path": "fid_stats/jit_in256_stats.npz",
86
+ "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
87
+ "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
88
+ "gfid_backend": "online",
89
+ "gfid_num_classes": 1000,
90
+ "gfid_num_images": 50000,
91
+ "rfid_num_images": 50000,
92
+ "batch_size": 64,
93
+ "num_workers": 8,
94
+ "gfid_metric_verbose": false,
95
+ "gfid_keep_images": false,
96
+ "gfid_cfg_scale": null,
97
+ "gfid_cfg_scale_latent": null,
98
+ "gfid_cfg_interval": null,
99
+ "gfid_cfg_interval_latent": null,
100
+ "gfid_steps": null,
101
+ "eval_ema": "1"
102
+ },
103
+ "optim": {
104
+ "name": "adamw",
105
+ "lr": 0.0001,
106
+ "lr_schedule": "constant",
107
+ "weight_decay": 0.0,
108
+ "betas": [
109
+ 0.9,
110
+ 0.95
111
+ ],
112
+ "min_lr": 1e-06,
113
+ "warmup_epochs": 5
114
+ },
115
+ "checkpoint": {
116
+ "resume": "",
117
+ "auto_resume": true,
118
+ "save_interval": 1,
119
+ "keep_last": 3
120
+ },
121
+ "logging": {
122
+ "enable_wandb": false,
123
+ "entity": "",
124
+ "project": "diffusion-decoder",
125
+ "run_name": "diffusion_decoder_imagenet256"
126
+ }
127
+ }
baseline_dinov3_uf/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_150031.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T15:00:31",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 318.4571228027344,
9
+ "ginception/score": 1.1044092178344727,
10
+ "ginception/std": 0.051184553653001785
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_151306.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T15:13:06",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 296.68890380859375,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 296.68890380859375,
10
+ "rinception/score": 1.2041164636611938,
11
+ "rinception/std": 0.06055246293544769,
12
+ "rl1/score": 0.2864151106643677,
13
+ "rlpips/score": 0.7054891017150879,
14
+ "rpsnr/score": 9.468088702392578
15
+ }
16
+ }
baseline_dinov3_uf/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_170639.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T17:06:39",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 377.2621765136719,
9
+ "ginception/score": 1.0861103534698486,
10
+ "ginception/std": 0.06107068806886673
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_171913.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T17:19:13",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 33.72285842895508,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 33.72285842895508,
10
+ "rinception/score": 29.16294288635254,
11
+ "rinception/std": 4.797229766845703,
12
+ "rl1/score": 0.18163616892814635,
13
+ "rlpips/score": 0.4273777462387085,
14
+ "rpsnr/score": 13.872778917236328
15
+ }
16
+ }
baseline_dinov3_uf/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_191303.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T19:13:03",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 118.85285186767578,
9
+ "ginception/score": 6.589343070983887,
10
+ "ginception/std": 0.2932935953140259
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_192539.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T19:25:39",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 2.975400686264038,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 2.975400686264038,
10
+ "rinception/score": 55.23002243041992,
11
+ "rinception/std": 7.260276794433594,
12
+ "rl1/score": 0.06078192769527435,
13
+ "rlpips/score": 0.208553619556427,
14
+ "rpsnr/score": 21.92944625
15
+ }
16
+ }
baseline_dinov3_uf/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211926.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T21:19:26",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 90.12359619140625,
9
+ "ginception/score": 12.53602123260498,
10
+ "ginception/std": 0.1810392588376999
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213159.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T21:31:59",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.3650418519973755,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.3650418519973755,
10
+ "rinception/score": 57.91691970825195,
11
+ "rinception/std": 7.2867231369018555,
12
+ "rl1/score": 0.047244364943504334,
13
+ "rlpips/score": 0.16791890050888061,
14
+ "rpsnr/score": 24.01355887939453
15
+ }
16
+ }
baseline_dinov3_uf/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_232551.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:25:51",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 69.93110656738281,
9
+ "ginception/score": 18.20962905883789,
10
+ "ginception/std": 0.366786926984787
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_233826.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:38:26",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.8765761852264404,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.8765761852264404,
10
+ "rinception/score": 58.834983825683594,
11
+ "rinception/std": 7.452160835266113,
12
+ "rl1/score": 0.038886987855434416,
13
+ "rlpips/score": 0.15030842846870424,
14
+ "rpsnr/score": 25.484861188964842
15
+ }
16
+ }
baseline_dinov3_uf/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_013229.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-29T01:32:29",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_generation",
6
+ "stats": {
7
+ "gfid/num_images": 50000.0,
8
+ "gfid/score": 61.83375549316406,
9
+ "ginception/score": 21.42015266418457,
10
+ "ginception/std": 0.40415942668914795
11
+ }
12
+ }
baseline_dinov3_uf/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_014506.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-29T01:45:06",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.6807690858840942,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.6807690858840942,
10
+ "rinception/score": 59.302391052246094,
11
+ "rinception/std": 7.438356876373291,
12
+ "rl1/score": 0.03433100723981857,
13
+ "rlpips/score": 0.1406316768550873,
14
+ "rpsnr/score": 26.435069028320314
15
+ }
16
+ }
baseline_dinov3_uf/logs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
baseline_dinov3_uf/training_metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
rec_only_dinov3/config.resolved.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "output_dir": "work_dirs/rec_only_dinov3",
4
+ "model": {
5
+ "img_size": 256,
6
+ "input_range": "minus_one_one",
7
+ "num_classes": 1000,
8
+ "encoder_type": "dinov3",
9
+ "encoder_model_size": "base",
10
+ "encoder_patch_size": 16,
11
+ "freeze_encoder_backbone": true,
12
+ "token_channels": 128,
13
+ "mask_ratio": 0.0,
14
+ "mask_ratio_min": 0.0,
15
+ "mask_ratio_type": "random",
16
+ "use_qknorm_encoder": false,
17
+ "latent_hw": 16,
18
+ "decoder_model": "JiTCoT-B/16",
19
+ "decoder_patch_size": 16,
20
+ "bottleneck_dim_latent": 128,
21
+ "dh_depth": 2,
22
+ "dh_hidden_size": 1024,
23
+ "attn_dropout": 0.0,
24
+ "proj_dropout": 0.0,
25
+ "enable_ema": true,
26
+ "ema_decay1": 0.9999,
27
+ "ema_decay2": 0.9998,
28
+ "label_drop_prob": 0.1,
29
+ "P_mean": -0.4,
30
+ "P_std": 0.8,
31
+ "latent_mean": -1.2,
32
+ "latent_std": 1.0,
33
+ "latent_weight": 0.0,
34
+ "perceptual_weight": 1.0,
35
+ "perceptual_net": "lpips-convnext_s-1.0-0.1",
36
+ "choose_latent_p": 0.0,
37
+ "sample_mode": "reconstruction_only",
38
+ "latent_max_t": 1.0,
39
+ "latent_pixel_offset": 0.0,
40
+ "latent_pixel_shift": 1.0,
41
+ "t_eps": 0.05,
42
+ "t_eps_inference": 0.05,
43
+ "noise_scale": 1.0,
44
+ "sampling_method": "heun",
45
+ "num_sampling_steps": 50,
46
+ "cfg": 1.0,
47
+ "cfg_latent": 1.0,
48
+ "interval_min": 0.0,
49
+ "interval_max": 1.0,
50
+ "interval_min_latent": 0.0,
51
+ "interval_max_latent": 1.0,
52
+ "gen_shift_pixel": 1.0,
53
+ "gen_shift_latent": 1.0,
54
+ "guidance_method": "cfg"
55
+ },
56
+ "data": {
57
+ "train_dir": "data/imagenet/train",
58
+ "val_dir": "data/imagenet/val",
59
+ "num_workers": 8,
60
+ "pin_memory": true,
61
+ "persistent_workers": true
62
+ },
63
+ "train": {
64
+ "epochs": 200,
65
+ "global_batch_size": 1024,
66
+ "eval_global_batch_size": 1024,
67
+ "grad_accum_steps": 1,
68
+ "grad_clip": 3.0,
69
+ "amp_dtype": "bf16",
70
+ "log_interval": 50
71
+ },
72
+ "visualization": {
73
+ "initial_visualization": true,
74
+ "vis_interval": 500,
75
+ "visualize_reconstruction": true,
76
+ "visualize_generation": false
77
+ },
78
+ "eval": {
79
+ "initial_eval": {
80
+ "reconstruction": false,
81
+ "generation": false
82
+ },
83
+ "gfid_interval": 0,
84
+ "rfid_interval": 10,
85
+ "gfid_stats_path": "",
86
+ "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
87
+ "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
88
+ "gfid_backend": "online",
89
+ "gfid_num_classes": 1000,
90
+ "gfid_num_images": 50000,
91
+ "rfid_num_images": 50000,
92
+ "batch_size": 128,
93
+ "num_workers": 8,
94
+ "gfid_metric_verbose": false,
95
+ "gfid_keep_images": false,
96
+ "gfid_cfg_scale": null,
97
+ "gfid_cfg_scale_latent": null,
98
+ "gfid_cfg_interval": null,
99
+ "gfid_cfg_interval_latent": null,
100
+ "gfid_steps": null,
101
+ "eval_ema": "1"
102
+ },
103
+ "optim": {
104
+ "name": "adamw",
105
+ "lr": 0.0001,
106
+ "lr_schedule": "constant",
107
+ "weight_decay": 0.0,
108
+ "betas": [
109
+ 0.9,
110
+ 0.95
111
+ ],
112
+ "min_lr": 1e-06,
113
+ "warmup_epochs": 5
114
+ },
115
+ "checkpoint": {
116
+ "resume": "",
117
+ "auto_resume": true,
118
+ "save_interval": 1,
119
+ "keep_last": 3
120
+ },
121
+ "logging": {
122
+ "enable_wandb": false,
123
+ "entity": "",
124
+ "project": "diffusion-decoder",
125
+ "run_name": "jitcot_rec_only_vgg_convnext_imagenet256"
126
+ }
127
+ }
rec_only_dinov3/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_110622.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T11:06:22",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 299.427001953125,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 299.427001953125,
10
+ "rinception/score": 1.1586003303527832,
11
+ "rinception/std": 0.14029201865196228,
12
+ "rl1/score": 0.3669631064605713,
13
+ "rlpips/score": 0.799619822769165,
14
+ "rpsnr/score": 7.547315838012695
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_124235.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T12:42:35",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 22.071176528930664,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 22.071176528930664,
10
+ "rinception/score": 43.34564971923828,
11
+ "rinception/std": 3.7558352947235107,
12
+ "rl1/score": 0.2071618295097351,
13
+ "rlpips/score": 0.5011178592300415,
14
+ "rpsnr/score": 12.862273885498047
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_141847.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T14:18:47",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 9.413352012634277,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 9.413352012634277,
10
+ "rinception/score": 53.314453125,
11
+ "rinception/std": 6.155331134796143,
12
+ "rl1/score": 0.18956555074691772,
13
+ "rlpips/score": 0.45460860481262205,
14
+ "rpsnr/score": 13.669592930908204
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_155459.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T15:54:59",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 2.9257969856262207,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 2.9257969856262207,
10
+ "rinception/score": 57.289398193359375,
11
+ "rinception/std": 7.0953593254089355,
12
+ "rl1/score": 0.1296201045036316,
13
+ "rlpips/score": 0.3560291623687744,
14
+ "rpsnr/score": 16.702779267578126
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_173111.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T17:31:11",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.5682669878005981,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.5682669878005981,
10
+ "rinception/score": 58.81327438354492,
11
+ "rinception/std": 7.187229633331299,
12
+ "rl1/score": 0.10405946157455444,
13
+ "rlpips/score": 0.31758854347229004,
14
+ "rpsnr/score": 17.98356410522461
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_190723.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T19:07:23",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.3132721185684204,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.3132721185684204,
10
+ "rinception/score": 59.43439483642578,
11
+ "rinception/std": 7.511739253997803,
12
+ "rl1/score": 0.09370932280540466,
13
+ "rlpips/score": 0.2997593156814575,
14
+ "rpsnr/score": 18.51156289794922
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0007_epoch_0070_reconstruction_step_00087570_20260528_204340.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T20:43:40",
3
+ "epoch": 70,
4
+ "global_step": 87570,
5
+ "name": "epoch_0070_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.2280447483062744,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.2280447483062744,
10
+ "rinception/score": 59.451393127441406,
11
+ "rinception/std": 7.455377578735352,
12
+ "rl1/score": 0.09087045845985413,
13
+ "rlpips/score": 0.2918792335700989,
14
+ "rpsnr/score": 18.59110985961914
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0008_epoch_0080_reconstruction_step_00100080_20260528_221953.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T22:19:53",
3
+ "epoch": 80,
4
+ "global_step": 100080,
5
+ "name": "epoch_0080_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.2123676538467407,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.2123676538467407,
10
+ "rinception/score": 59.36212921142578,
11
+ "rinception/std": 7.355563163757324,
12
+ "rl1/score": 0.08963097463607789,
13
+ "rlpips/score": 0.28763298891067507,
14
+ "rpsnr/score": 18.55867121826172
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0009_epoch_0090_reconstruction_step_00112590_20260528_235612.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:56:12",
3
+ "epoch": 90,
4
+ "global_step": 112590,
5
+ "name": "epoch_0090_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.285521388053894,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.285521388053894,
10
+ "rinception/score": 59.30804443359375,
11
+ "rinception/std": 7.406274318695068,
12
+ "rl1/score": 0.09017072152137756,
13
+ "rlpips/score": 0.2866918243408203,
14
+ "rpsnr/score": 18.41382909301758
15
+ }
16
+ }
rec_only_dinov3/eval_results/eval_0010_epoch_0100_reconstruction_step_00125100_20260529_013235.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-29T01:32:35",
3
+ "epoch": 100,
4
+ "global_step": 125100,
5
+ "name": "epoch_0100_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 1.1464312076568604,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 1.1464312076568604,
10
+ "rinception/score": 59.60105514526367,
11
+ "rinception/std": 7.36441707611084,
12
+ "rl1/score": 0.08871454634666442,
13
+ "rlpips/score": 0.2819660251617432,
14
+ "rpsnr/score": 18.57816102661133
15
+ }
16
+ }
rec_only_dinov3/logs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
rec_only_dinov3/training_metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
rec_only_dinov3_uf/config.resolved.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "output_dir": "work_dirs/rec_only_dinov3_uf",
4
+ "model": {
5
+ "img_size": 256,
6
+ "input_range": "minus_one_one",
7
+ "num_classes": 1000,
8
+ "encoder_type": "dinov3",
9
+ "encoder_model_size": "base",
10
+ "encoder_patch_size": 16,
11
+ "freeze_encoder_backbone": false,
12
+ "token_channels": 128,
13
+ "mask_ratio": 0.0,
14
+ "mask_ratio_min": 0.0,
15
+ "mask_ratio_type": "random",
16
+ "use_qknorm_encoder": false,
17
+ "latent_hw": 16,
18
+ "decoder_model": "JiTCoT-B/16",
19
+ "decoder_patch_size": 16,
20
+ "bottleneck_dim_latent": 128,
21
+ "dh_depth": 2,
22
+ "dh_hidden_size": 1024,
23
+ "attn_dropout": 0.0,
24
+ "proj_dropout": 0.0,
25
+ "enable_ema": true,
26
+ "ema_decay1": 0.9999,
27
+ "ema_decay2": 0.9998,
28
+ "label_drop_prob": 0.1,
29
+ "P_mean": -0.4,
30
+ "P_std": 0.8,
31
+ "latent_mean": -1.2,
32
+ "latent_std": 1.0,
33
+ "latent_weight": 0.0,
34
+ "perceptual_weight": 1.0,
35
+ "perceptual_net": "lpips-convnext_s-1.0-0.1",
36
+ "choose_latent_p": 0.0,
37
+ "sample_mode": "reconstruction_only",
38
+ "latent_max_t": 1.0,
39
+ "latent_pixel_offset": 0.0,
40
+ "latent_pixel_shift": 1.0,
41
+ "t_eps": 0.05,
42
+ "t_eps_inference": 0.05,
43
+ "noise_scale": 1.0,
44
+ "sampling_method": "heun",
45
+ "num_sampling_steps": 50,
46
+ "cfg": 1.0,
47
+ "cfg_latent": 1.0,
48
+ "interval_min": 0.0,
49
+ "interval_max": 1.0,
50
+ "interval_min_latent": 0.0,
51
+ "interval_max_latent": 1.0,
52
+ "gen_shift_pixel": 1.0,
53
+ "gen_shift_latent": 1.0,
54
+ "guidance_method": "cfg"
55
+ },
56
+ "data": {
57
+ "train_dir": "data/imagenet/train",
58
+ "val_dir": "data/imagenet/val",
59
+ "num_workers": 8,
60
+ "pin_memory": true,
61
+ "persistent_workers": true
62
+ },
63
+ "train": {
64
+ "epochs": 200,
65
+ "global_batch_size": 1024,
66
+ "eval_global_batch_size": 1024,
67
+ "grad_accum_steps": 1,
68
+ "grad_clip": 3.0,
69
+ "amp_dtype": "bf16",
70
+ "log_interval": 50
71
+ },
72
+ "visualization": {
73
+ "initial_visualization": true,
74
+ "vis_interval": 500,
75
+ "visualize_reconstruction": true,
76
+ "visualize_generation": false
77
+ },
78
+ "eval": {
79
+ "initial_eval": {
80
+ "reconstruction": false,
81
+ "generation": false
82
+ },
83
+ "gfid_interval": 0,
84
+ "rfid_interval": 10,
85
+ "gfid_stats_path": "",
86
+ "rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
87
+ "inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
88
+ "gfid_backend": "online",
89
+ "gfid_num_classes": 1000,
90
+ "gfid_num_images": 50000,
91
+ "rfid_num_images": 50000,
92
+ "batch_size": 128,
93
+ "num_workers": 8,
94
+ "gfid_metric_verbose": false,
95
+ "gfid_keep_images": false,
96
+ "gfid_cfg_scale": null,
97
+ "gfid_cfg_scale_latent": null,
98
+ "gfid_cfg_interval": null,
99
+ "gfid_cfg_interval_latent": null,
100
+ "gfid_steps": null,
101
+ "eval_ema": "1"
102
+ },
103
+ "optim": {
104
+ "name": "adamw",
105
+ "lr": 0.0001,
106
+ "lr_schedule": "constant",
107
+ "weight_decay": 0.0,
108
+ "betas": [
109
+ 0.9,
110
+ 0.95
111
+ ],
112
+ "min_lr": 1e-06,
113
+ "warmup_epochs": 5
114
+ },
115
+ "checkpoint": {
116
+ "resume": "",
117
+ "auto_resume": true,
118
+ "save_interval": 1,
119
+ "keep_last": 3
120
+ },
121
+ "logging": {
122
+ "enable_wandb": false,
123
+ "entity": "",
124
+ "project": "diffusion-decoder",
125
+ "run_name": "jitcot_rec_only_vgg_convnext_imagenet256"
126
+ }
127
+ }
rec_only_dinov3_uf/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_134902.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T13:49:02",
3
+ "epoch": 10,
4
+ "global_step": 12510,
5
+ "name": "epoch_0010_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 143.6580352783203,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 143.6580352783203,
10
+ "rinception/score": 8.937095642089844,
11
+ "rinception/std": 1.9498242139816284,
12
+ "rl1/score": 0.12238822456359863,
13
+ "rlpips/score": 0.5494522420501708,
14
+ "rpsnr/score": 16.949355013427734
15
+ }
16
+ }
rec_only_dinov3_uf/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_153954.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T15:39:54",
3
+ "epoch": 20,
4
+ "global_step": 25020,
5
+ "name": "epoch_0020_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 5.159643173217773,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 5.159643173217773,
10
+ "rinception/score": 56.56890869140625,
11
+ "rinception/std": 6.62244987487793,
12
+ "rl1/score": 0.04988961963653565,
13
+ "rlpips/score": 0.21052068838119506,
14
+ "rpsnr/score": 24.29544946533203
15
+ }
16
+ }
rec_only_dinov3_uf/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_173108.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T17:31:08",
3
+ "epoch": 30,
4
+ "global_step": 37530,
5
+ "name": "epoch_0030_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.9540103673934937,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.9540103673934937,
10
+ "rinception/score": 60.06682205200195,
11
+ "rinception/std": 7.526337146759033,
12
+ "rl1/score": 0.029459762790203094,
13
+ "rlpips/score": 0.11677609906196594,
14
+ "rpsnr/score": 28.49137232421875
15
+ }
16
+ }
rec_only_dinov3_uf/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_192208.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T19:22:08",
3
+ "epoch": 40,
4
+ "global_step": 50040,
5
+ "name": "epoch_0040_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.450069397687912,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.450069397687912,
10
+ "rinception/score": 60.43627166748047,
11
+ "rinception/std": 7.46042013168335,
12
+ "rl1/score": 0.0233486088180542,
13
+ "rlpips/score": 0.09246708666801452,
14
+ "rpsnr/score": 30.35999076904297
15
+ }
16
+ }
rec_only_dinov3_uf/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_211317.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T21:13:17",
3
+ "epoch": 50,
4
+ "global_step": 62550,
5
+ "name": "epoch_0050_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.32373496890068054,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.32373496890068054,
10
+ "rinception/score": 60.77216339111328,
11
+ "rinception/std": 7.522408962249756,
12
+ "rl1/score": 0.02103063518643379,
13
+ "rlpips/score": 0.08168746644496917,
14
+ "rpsnr/score": 31.230824963378907
15
+ }
16
+ }
rec_only_dinov3_uf/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_230434.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-28T23:04:34",
3
+ "epoch": 60,
4
+ "global_step": 75060,
5
+ "name": "epoch_0060_reconstruction",
6
+ "stats": {
7
+ "rfid-val/score": 0.2859066128730774,
8
+ "rfid/num_images": 50000.0,
9
+ "rfid/score": 0.2859066128730774,
10
+ "rinception/score": 60.9921875,
11
+ "rinception/std": 7.5456461906433105,
12
+ "rl1/score": 0.02010475501537323,
13
+ "rlpips/score": 0.07721434003353118,
14
+ "rpsnr/score": 31.611067966308593
15
+ }
16
+ }