Upload files excluding .pt
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- baseline/config.resolved.json +126 -0
- baseline/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_141611.txt +12 -0
- baseline/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_142846.txt +16 -0
- baseline/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_163636.txt +12 -0
- baseline/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_164909.txt +16 -0
- baseline/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_185706.txt +12 -0
- baseline/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_190941.txt +16 -0
- baseline/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211749.txt +12 -0
- baseline/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213021.txt +16 -0
- baseline/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_233832.txt +12 -0
- baseline/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_235105.txt +16 -0
- baseline/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_015904.txt +12 -0
- baseline/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_021139.txt +16 -0
- baseline/logs/log.txt +0 -0
- baseline/training_metrics.json +0 -0
- baseline_dinov3_uf/config.resolved.json +127 -0
- baseline_dinov3_uf/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_150031.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_151306.txt +16 -0
- baseline_dinov3_uf/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_170639.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_171913.txt +16 -0
- baseline_dinov3_uf/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_191303.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_192539.txt +16 -0
- baseline_dinov3_uf/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211926.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213159.txt +16 -0
- baseline_dinov3_uf/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_232551.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_233826.txt +16 -0
- baseline_dinov3_uf/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_013229.txt +12 -0
- baseline_dinov3_uf/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_014506.txt +16 -0
- baseline_dinov3_uf/logs/log.txt +0 -0
- baseline_dinov3_uf/training_metrics.json +0 -0
- rec_only_dinov3/config.resolved.json +127 -0
- rec_only_dinov3/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_110622.txt +16 -0
- rec_only_dinov3/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_124235.txt +16 -0
- rec_only_dinov3/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_141847.txt +16 -0
- rec_only_dinov3/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_155459.txt +16 -0
- rec_only_dinov3/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_173111.txt +16 -0
- rec_only_dinov3/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_190723.txt +16 -0
- rec_only_dinov3/eval_results/eval_0007_epoch_0070_reconstruction_step_00087570_20260528_204340.txt +16 -0
- rec_only_dinov3/eval_results/eval_0008_epoch_0080_reconstruction_step_00100080_20260528_221953.txt +16 -0
- rec_only_dinov3/eval_results/eval_0009_epoch_0090_reconstruction_step_00112590_20260528_235612.txt +16 -0
- rec_only_dinov3/eval_results/eval_0010_epoch_0100_reconstruction_step_00125100_20260529_013235.txt +16 -0
- rec_only_dinov3/logs/log.txt +0 -0
- rec_only_dinov3/training_metrics.json +0 -0
- rec_only_dinov3_uf/config.resolved.json +127 -0
- rec_only_dinov3_uf/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_134902.txt +16 -0
- rec_only_dinov3_uf/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_153954.txt +16 -0
- rec_only_dinov3_uf/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_173108.txt +16 -0
- rec_only_dinov3_uf/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_192208.txt +16 -0
- rec_only_dinov3_uf/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_211317.txt +16 -0
- rec_only_dinov3_uf/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_230434.txt +16 -0
baseline/config.resolved.json
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"seed": 42,
|
| 3 |
+
"output_dir": "work_dirs/baseline",
|
| 4 |
+
"model": {
|
| 5 |
+
"img_size": 256,
|
| 6 |
+
"input_range": "minus_one_one",
|
| 7 |
+
"num_classes": 1000,
|
| 8 |
+
"encoder_type": "rectok",
|
| 9 |
+
"encoder_model_size": "base",
|
| 10 |
+
"encoder_patch_size": 16,
|
| 11 |
+
"token_channels": 128,
|
| 12 |
+
"mask_ratio": 0.4,
|
| 13 |
+
"mask_ratio_min": -0.1,
|
| 14 |
+
"mask_ratio_type": "random",
|
| 15 |
+
"use_qknorm_encoder": false,
|
| 16 |
+
"latent_hw": 16,
|
| 17 |
+
"decoder_model": "JiTCoT-B/16",
|
| 18 |
+
"decoder_patch_size": 16,
|
| 19 |
+
"bottleneck_dim_latent": 128,
|
| 20 |
+
"dh_depth": 2,
|
| 21 |
+
"dh_hidden_size": 1024,
|
| 22 |
+
"attn_dropout": 0.0,
|
| 23 |
+
"proj_dropout": 0.0,
|
| 24 |
+
"enable_ema": true,
|
| 25 |
+
"ema_decay1": 0.9999,
|
| 26 |
+
"ema_decay2": 0.9998,
|
| 27 |
+
"label_drop_prob": 0.1,
|
| 28 |
+
"P_mean": -0.4,
|
| 29 |
+
"P_std": 0.8,
|
| 30 |
+
"latent_mean": -1.2,
|
| 31 |
+
"latent_std": 1.0,
|
| 32 |
+
"latent_weight": 1.0,
|
| 33 |
+
"choose_latent_p": 0.4,
|
| 34 |
+
"perceptual_weight": 1.0,
|
| 35 |
+
"perceptual_net": "lpips-convnext_s-1.0-0.1",
|
| 36 |
+
"sample_mode": "latent_first_cascaded_noised",
|
| 37 |
+
"latent_max_t": 1.0,
|
| 38 |
+
"latent_pixel_offset": 0.0,
|
| 39 |
+
"latent_pixel_shift": 1.0,
|
| 40 |
+
"t_eps": 0.05,
|
| 41 |
+
"t_eps_inference": 0.05,
|
| 42 |
+
"noise_scale": 1.0,
|
| 43 |
+
"sampling_method": "heun",
|
| 44 |
+
"num_sampling_steps": 50,
|
| 45 |
+
"cfg": 1.0,
|
| 46 |
+
"cfg_latent": 1.0,
|
| 47 |
+
"interval_min": 0.0,
|
| 48 |
+
"interval_max": 1.0,
|
| 49 |
+
"interval_min_latent": 0.0,
|
| 50 |
+
"interval_max_latent": 1.0,
|
| 51 |
+
"gen_shift_pixel": 1.0,
|
| 52 |
+
"gen_shift_latent": 1.0,
|
| 53 |
+
"guidance_method": "cfg"
|
| 54 |
+
},
|
| 55 |
+
"data": {
|
| 56 |
+
"train_dir": "data/imagenet/train",
|
| 57 |
+
"val_dir": "data/imagenet/val",
|
| 58 |
+
"num_workers": 8,
|
| 59 |
+
"pin_memory": true,
|
| 60 |
+
"persistent_workers": true
|
| 61 |
+
},
|
| 62 |
+
"train": {
|
| 63 |
+
"epochs": 200,
|
| 64 |
+
"global_batch_size": 1024,
|
| 65 |
+
"eval_global_batch_size": 1024,
|
| 66 |
+
"grad_accum_steps": 1,
|
| 67 |
+
"grad_clip": 3.0,
|
| 68 |
+
"amp_dtype": "bf16",
|
| 69 |
+
"log_interval": 50
|
| 70 |
+
},
|
| 71 |
+
"visualization": {
|
| 72 |
+
"initial_visualization": true,
|
| 73 |
+
"vis_interval": 100,
|
| 74 |
+
"visualize_reconstruction": true,
|
| 75 |
+
"visualize_generation": true
|
| 76 |
+
},
|
| 77 |
+
"eval": {
|
| 78 |
+
"initial_eval": {
|
| 79 |
+
"reconstruction": false,
|
| 80 |
+
"generation": false
|
| 81 |
+
},
|
| 82 |
+
"gfid_interval": 10,
|
| 83 |
+
"rfid_interval": 10,
|
| 84 |
+
"gfid_stats_path": "fid_stats/jit_in256_stats.npz",
|
| 85 |
+
"rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
|
| 86 |
+
"inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
|
| 87 |
+
"gfid_backend": "online",
|
| 88 |
+
"gfid_num_classes": 1000,
|
| 89 |
+
"gfid_num_images": 50000,
|
| 90 |
+
"rfid_num_images": 50000,
|
| 91 |
+
"batch_size": 64,
|
| 92 |
+
"num_workers": 8,
|
| 93 |
+
"gfid_metric_verbose": false,
|
| 94 |
+
"gfid_keep_images": false,
|
| 95 |
+
"gfid_cfg_scale": null,
|
| 96 |
+
"gfid_cfg_scale_latent": null,
|
| 97 |
+
"gfid_cfg_interval": null,
|
| 98 |
+
"gfid_cfg_interval_latent": null,
|
| 99 |
+
"gfid_steps": null,
|
| 100 |
+
"eval_ema": "1"
|
| 101 |
+
},
|
| 102 |
+
"optim": {
|
| 103 |
+
"name": "adamw",
|
| 104 |
+
"lr": 0.0001,
|
| 105 |
+
"lr_schedule": "constant",
|
| 106 |
+
"weight_decay": 0.0,
|
| 107 |
+
"betas": [
|
| 108 |
+
0.9,
|
| 109 |
+
0.95
|
| 110 |
+
],
|
| 111 |
+
"min_lr": 1e-06,
|
| 112 |
+
"warmup_epochs": 5
|
| 113 |
+
},
|
| 114 |
+
"checkpoint": {
|
| 115 |
+
"resume": "",
|
| 116 |
+
"auto_resume": true,
|
| 117 |
+
"save_interval": 1,
|
| 118 |
+
"keep_last": 3
|
| 119 |
+
},
|
| 120 |
+
"logging": {
|
| 121 |
+
"enable_wandb": false,
|
| 122 |
+
"entity": "",
|
| 123 |
+
"project": "diffusion-decoder",
|
| 124 |
+
"run_name": "diffusion_decoder_imagenet256"
|
| 125 |
+
}
|
| 126 |
+
}
|
baseline/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_141611.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T14:16:11",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 310.2751159667969,
|
| 9 |
+
"ginception/score": 1.1903746128082275,
|
| 10 |
+
"ginception/std": 0.14354869723320007
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_142846.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T14:28:46",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 238.10910034179688,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 238.10910034179688,
|
| 10 |
+
"rinception/score": 1.5671464204788208,
|
| 11 |
+
"rinception/std": 0.2834608256816864,
|
| 12 |
+
"rl1/score": 0.3674238417053223,
|
| 13 |
+
"rlpips/score": 0.8003478924560546,
|
| 14 |
+
"rpsnr/score": 7.616721407775879
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_163636.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T16:36:36",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 398.9952392578125,
|
| 9 |
+
"ginception/score": 1.0350425243377686,
|
| 10 |
+
"ginception/std": 0.016915658488869667
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_164909.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T16:49:09",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 48.23517608642578,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 48.23517608642578,
|
| 10 |
+
"rinception/score": 24.853290557861328,
|
| 11 |
+
"rinception/std": 5.148846626281738,
|
| 12 |
+
"rl1/score": 0.21535656043052673,
|
| 13 |
+
"rlpips/score": 0.5141746402549744,
|
| 14 |
+
"rpsnr/score": 12.606189897460938
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_185706.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T18:57:06",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 315.4123840332031,
|
| 9 |
+
"ginception/score": 1.8223289251327515,
|
| 10 |
+
"ginception/std": 0.43513810634613037
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_190941.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T19:09:41",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 2.729315757751465,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 2.729315757751465,
|
| 10 |
+
"rinception/score": 56.801666259765625,
|
| 11 |
+
"rinception/std": 7.076909065246582,
|
| 12 |
+
"rl1/score": 0.10776259976387025,
|
| 13 |
+
"rlpips/score": 0.20746465351104737,
|
| 14 |
+
"rpsnr/score": 18.78563266845703
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211749.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T21:17:49",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 100.38690948486328,
|
| 9 |
+
"ginception/score": 9.55769157409668,
|
| 10 |
+
"ginception/std": 0.47217872738838196
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213021.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T21:30:21",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.2609989643096924,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.2609989643096924,
|
| 10 |
+
"rinception/score": 58.3740234375,
|
| 11 |
+
"rinception/std": 7.179546356201172,
|
| 12 |
+
"rl1/score": 0.04572096435785294,
|
| 13 |
+
"rlpips/score": 0.1400455956363678,
|
| 14 |
+
"rpsnr/score": 25.163568264160155
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_233832.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:38:32",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 71.64373779296875,
|
| 9 |
+
"ginception/score": 17.276798248291016,
|
| 10 |
+
"ginception/std": 0.2812263071537018
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_235105.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:51:05",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.8307203054428101,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.8307203054428101,
|
| 10 |
+
"rinception/score": 59.03937911987305,
|
| 11 |
+
"rinception/std": 7.30922794342041,
|
| 12 |
+
"rl1/score": 0.03047736176252365,
|
| 13 |
+
"rlpips/score": 0.1192132255268097,
|
| 14 |
+
"rpsnr/score": 27.93316936279297
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_015904.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-29T01:59:04",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 59.03805160522461,
|
| 9 |
+
"ginception/score": 23.49216079711914,
|
| 10 |
+
"ginception/std": 0.4643714427947998
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_021139.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-29T02:11:39",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.6298550963401794,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.6298550963401794,
|
| 10 |
+
"rinception/score": 59.487831115722656,
|
| 11 |
+
"rinception/std": 7.161473751068115,
|
| 12 |
+
"rl1/score": 0.028371377193927766,
|
| 13 |
+
"rlpips/score": 0.11277780175209046,
|
| 14 |
+
"rpsnr/score": 28.530626791992187
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline/logs/log.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baseline/training_metrics.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baseline_dinov3_uf/config.resolved.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"seed": 42,
|
| 3 |
+
"output_dir": "work_dirs/baseline_dinov3_uf",
|
| 4 |
+
"model": {
|
| 5 |
+
"img_size": 256,
|
| 6 |
+
"input_range": "minus_one_one",
|
| 7 |
+
"num_classes": 1000,
|
| 8 |
+
"encoder_type": "dinov3",
|
| 9 |
+
"encoder_model_size": "base",
|
| 10 |
+
"encoder_patch_size": 16,
|
| 11 |
+
"freeze_encoder_backbone": false,
|
| 12 |
+
"token_channels": 128,
|
| 13 |
+
"mask_ratio": 0.0,
|
| 14 |
+
"mask_ratio_min": 0.0,
|
| 15 |
+
"mask_ratio_type": "random",
|
| 16 |
+
"use_qknorm_encoder": false,
|
| 17 |
+
"latent_hw": 16,
|
| 18 |
+
"decoder_model": "JiTCoT-B/16",
|
| 19 |
+
"decoder_patch_size": 16,
|
| 20 |
+
"bottleneck_dim_latent": 128,
|
| 21 |
+
"dh_depth": 2,
|
| 22 |
+
"dh_hidden_size": 1024,
|
| 23 |
+
"attn_dropout": 0.0,
|
| 24 |
+
"proj_dropout": 0.0,
|
| 25 |
+
"enable_ema": true,
|
| 26 |
+
"ema_decay1": 0.9999,
|
| 27 |
+
"ema_decay2": 0.9998,
|
| 28 |
+
"label_drop_prob": 0.1,
|
| 29 |
+
"P_mean": -0.4,
|
| 30 |
+
"P_std": 0.8,
|
| 31 |
+
"latent_mean": -1.2,
|
| 32 |
+
"latent_std": 1.0,
|
| 33 |
+
"latent_weight": 1.0,
|
| 34 |
+
"choose_latent_p": 0.4,
|
| 35 |
+
"perceptual_weight": 1.0,
|
| 36 |
+
"perceptual_net": "lpips-convnext_s-1.0-0.1",
|
| 37 |
+
"sample_mode": "latent_first_cascaded_noised",
|
| 38 |
+
"latent_max_t": 1.0,
|
| 39 |
+
"latent_pixel_offset": 0.0,
|
| 40 |
+
"latent_pixel_shift": 1.0,
|
| 41 |
+
"t_eps": 0.05,
|
| 42 |
+
"t_eps_inference": 0.05,
|
| 43 |
+
"noise_scale": 1.0,
|
| 44 |
+
"sampling_method": "heun",
|
| 45 |
+
"num_sampling_steps": 50,
|
| 46 |
+
"cfg": 1.0,
|
| 47 |
+
"cfg_latent": 1.0,
|
| 48 |
+
"interval_min": 0.0,
|
| 49 |
+
"interval_max": 1.0,
|
| 50 |
+
"interval_min_latent": 0.0,
|
| 51 |
+
"interval_max_latent": 1.0,
|
| 52 |
+
"gen_shift_pixel": 1.0,
|
| 53 |
+
"gen_shift_latent": 1.0,
|
| 54 |
+
"guidance_method": "cfg"
|
| 55 |
+
},
|
| 56 |
+
"data": {
|
| 57 |
+
"train_dir": "data/imagenet/train",
|
| 58 |
+
"val_dir": "data/imagenet/val",
|
| 59 |
+
"num_workers": 8,
|
| 60 |
+
"pin_memory": true,
|
| 61 |
+
"persistent_workers": true
|
| 62 |
+
},
|
| 63 |
+
"train": {
|
| 64 |
+
"epochs": 200,
|
| 65 |
+
"global_batch_size": 1024,
|
| 66 |
+
"eval_global_batch_size": 1024,
|
| 67 |
+
"grad_accum_steps": 1,
|
| 68 |
+
"grad_clip": 3.0,
|
| 69 |
+
"amp_dtype": "bf16",
|
| 70 |
+
"log_interval": 50
|
| 71 |
+
},
|
| 72 |
+
"visualization": {
|
| 73 |
+
"initial_visualization": true,
|
| 74 |
+
"vis_interval": 500,
|
| 75 |
+
"visualize_reconstruction": true,
|
| 76 |
+
"visualize_generation": true
|
| 77 |
+
},
|
| 78 |
+
"eval": {
|
| 79 |
+
"initial_eval": {
|
| 80 |
+
"reconstruction": false,
|
| 81 |
+
"generation": false
|
| 82 |
+
},
|
| 83 |
+
"gfid_interval": 10,
|
| 84 |
+
"rfid_interval": 10,
|
| 85 |
+
"gfid_stats_path": "fid_stats/jit_in256_stats.npz",
|
| 86 |
+
"rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
|
| 87 |
+
"inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
|
| 88 |
+
"gfid_backend": "online",
|
| 89 |
+
"gfid_num_classes": 1000,
|
| 90 |
+
"gfid_num_images": 50000,
|
| 91 |
+
"rfid_num_images": 50000,
|
| 92 |
+
"batch_size": 64,
|
| 93 |
+
"num_workers": 8,
|
| 94 |
+
"gfid_metric_verbose": false,
|
| 95 |
+
"gfid_keep_images": false,
|
| 96 |
+
"gfid_cfg_scale": null,
|
| 97 |
+
"gfid_cfg_scale_latent": null,
|
| 98 |
+
"gfid_cfg_interval": null,
|
| 99 |
+
"gfid_cfg_interval_latent": null,
|
| 100 |
+
"gfid_steps": null,
|
| 101 |
+
"eval_ema": "1"
|
| 102 |
+
},
|
| 103 |
+
"optim": {
|
| 104 |
+
"name": "adamw",
|
| 105 |
+
"lr": 0.0001,
|
| 106 |
+
"lr_schedule": "constant",
|
| 107 |
+
"weight_decay": 0.0,
|
| 108 |
+
"betas": [
|
| 109 |
+
0.9,
|
| 110 |
+
0.95
|
| 111 |
+
],
|
| 112 |
+
"min_lr": 1e-06,
|
| 113 |
+
"warmup_epochs": 5
|
| 114 |
+
},
|
| 115 |
+
"checkpoint": {
|
| 116 |
+
"resume": "",
|
| 117 |
+
"auto_resume": true,
|
| 118 |
+
"save_interval": 1,
|
| 119 |
+
"keep_last": 3
|
| 120 |
+
},
|
| 121 |
+
"logging": {
|
| 122 |
+
"enable_wandb": false,
|
| 123 |
+
"entity": "",
|
| 124 |
+
"project": "diffusion-decoder",
|
| 125 |
+
"run_name": "diffusion_decoder_imagenet256"
|
| 126 |
+
}
|
| 127 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0001_epoch_0010_generation_step_00012510_20260528_150031.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T15:00:31",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 318.4571228027344,
|
| 9 |
+
"ginception/score": 1.1044092178344727,
|
| 10 |
+
"ginception/std": 0.051184553653001785
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0002_epoch_0010_reconstruction_step_00012510_20260528_151306.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T15:13:06",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 296.68890380859375,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 296.68890380859375,
|
| 10 |
+
"rinception/score": 1.2041164636611938,
|
| 11 |
+
"rinception/std": 0.06055246293544769,
|
| 12 |
+
"rl1/score": 0.2864151106643677,
|
| 13 |
+
"rlpips/score": 0.7054891017150879,
|
| 14 |
+
"rpsnr/score": 9.468088702392578
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0003_epoch_0020_generation_step_00025020_20260528_170639.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T17:06:39",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 377.2621765136719,
|
| 9 |
+
"ginception/score": 1.0861103534698486,
|
| 10 |
+
"ginception/std": 0.06107068806886673
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0004_epoch_0020_reconstruction_step_00025020_20260528_171913.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T17:19:13",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 33.72285842895508,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 33.72285842895508,
|
| 10 |
+
"rinception/score": 29.16294288635254,
|
| 11 |
+
"rinception/std": 4.797229766845703,
|
| 12 |
+
"rl1/score": 0.18163616892814635,
|
| 13 |
+
"rlpips/score": 0.4273777462387085,
|
| 14 |
+
"rpsnr/score": 13.872778917236328
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0005_epoch_0030_generation_step_00037530_20260528_191303.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T19:13:03",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 118.85285186767578,
|
| 9 |
+
"ginception/score": 6.589343070983887,
|
| 10 |
+
"ginception/std": 0.2932935953140259
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0006_epoch_0030_reconstruction_step_00037530_20260528_192539.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T19:25:39",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 2.975400686264038,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 2.975400686264038,
|
| 10 |
+
"rinception/score": 55.23002243041992,
|
| 11 |
+
"rinception/std": 7.260276794433594,
|
| 12 |
+
"rl1/score": 0.06078192769527435,
|
| 13 |
+
"rlpips/score": 0.208553619556427,
|
| 14 |
+
"rpsnr/score": 21.92944625
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0007_epoch_0040_generation_step_00050040_20260528_211926.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T21:19:26",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 90.12359619140625,
|
| 9 |
+
"ginception/score": 12.53602123260498,
|
| 10 |
+
"ginception/std": 0.1810392588376999
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0008_epoch_0040_reconstruction_step_00050040_20260528_213159.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T21:31:59",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.3650418519973755,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.3650418519973755,
|
| 10 |
+
"rinception/score": 57.91691970825195,
|
| 11 |
+
"rinception/std": 7.2867231369018555,
|
| 12 |
+
"rl1/score": 0.047244364943504334,
|
| 13 |
+
"rlpips/score": 0.16791890050888061,
|
| 14 |
+
"rpsnr/score": 24.01355887939453
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0009_epoch_0050_generation_step_00062550_20260528_232551.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:25:51",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 69.93110656738281,
|
| 9 |
+
"ginception/score": 18.20962905883789,
|
| 10 |
+
"ginception/std": 0.366786926984787
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0010_epoch_0050_reconstruction_step_00062550_20260528_233826.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:38:26",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.8765761852264404,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.8765761852264404,
|
| 10 |
+
"rinception/score": 58.834983825683594,
|
| 11 |
+
"rinception/std": 7.452160835266113,
|
| 12 |
+
"rl1/score": 0.038886987855434416,
|
| 13 |
+
"rlpips/score": 0.15030842846870424,
|
| 14 |
+
"rpsnr/score": 25.484861188964842
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0011_epoch_0060_generation_step_00075060_20260529_013229.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-29T01:32:29",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_generation",
|
| 6 |
+
"stats": {
|
| 7 |
+
"gfid/num_images": 50000.0,
|
| 8 |
+
"gfid/score": 61.83375549316406,
|
| 9 |
+
"ginception/score": 21.42015266418457,
|
| 10 |
+
"ginception/std": 0.40415942668914795
|
| 11 |
+
}
|
| 12 |
+
}
|
baseline_dinov3_uf/eval_results/eval_0012_epoch_0060_reconstruction_step_00075060_20260529_014506.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-29T01:45:06",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.6807690858840942,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.6807690858840942,
|
| 10 |
+
"rinception/score": 59.302391052246094,
|
| 11 |
+
"rinception/std": 7.438356876373291,
|
| 12 |
+
"rl1/score": 0.03433100723981857,
|
| 13 |
+
"rlpips/score": 0.1406316768550873,
|
| 14 |
+
"rpsnr/score": 26.435069028320314
|
| 15 |
+
}
|
| 16 |
+
}
|
baseline_dinov3_uf/logs/log.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baseline_dinov3_uf/training_metrics.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
rec_only_dinov3/config.resolved.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"seed": 42,
|
| 3 |
+
"output_dir": "work_dirs/rec_only_dinov3",
|
| 4 |
+
"model": {
|
| 5 |
+
"img_size": 256,
|
| 6 |
+
"input_range": "minus_one_one",
|
| 7 |
+
"num_classes": 1000,
|
| 8 |
+
"encoder_type": "dinov3",
|
| 9 |
+
"encoder_model_size": "base",
|
| 10 |
+
"encoder_patch_size": 16,
|
| 11 |
+
"freeze_encoder_backbone": true,
|
| 12 |
+
"token_channels": 128,
|
| 13 |
+
"mask_ratio": 0.0,
|
| 14 |
+
"mask_ratio_min": 0.0,
|
| 15 |
+
"mask_ratio_type": "random",
|
| 16 |
+
"use_qknorm_encoder": false,
|
| 17 |
+
"latent_hw": 16,
|
| 18 |
+
"decoder_model": "JiTCoT-B/16",
|
| 19 |
+
"decoder_patch_size": 16,
|
| 20 |
+
"bottleneck_dim_latent": 128,
|
| 21 |
+
"dh_depth": 2,
|
| 22 |
+
"dh_hidden_size": 1024,
|
| 23 |
+
"attn_dropout": 0.0,
|
| 24 |
+
"proj_dropout": 0.0,
|
| 25 |
+
"enable_ema": true,
|
| 26 |
+
"ema_decay1": 0.9999,
|
| 27 |
+
"ema_decay2": 0.9998,
|
| 28 |
+
"label_drop_prob": 0.1,
|
| 29 |
+
"P_mean": -0.4,
|
| 30 |
+
"P_std": 0.8,
|
| 31 |
+
"latent_mean": -1.2,
|
| 32 |
+
"latent_std": 1.0,
|
| 33 |
+
"latent_weight": 0.0,
|
| 34 |
+
"perceptual_weight": 1.0,
|
| 35 |
+
"perceptual_net": "lpips-convnext_s-1.0-0.1",
|
| 36 |
+
"choose_latent_p": 0.0,
|
| 37 |
+
"sample_mode": "reconstruction_only",
|
| 38 |
+
"latent_max_t": 1.0,
|
| 39 |
+
"latent_pixel_offset": 0.0,
|
| 40 |
+
"latent_pixel_shift": 1.0,
|
| 41 |
+
"t_eps": 0.05,
|
| 42 |
+
"t_eps_inference": 0.05,
|
| 43 |
+
"noise_scale": 1.0,
|
| 44 |
+
"sampling_method": "heun",
|
| 45 |
+
"num_sampling_steps": 50,
|
| 46 |
+
"cfg": 1.0,
|
| 47 |
+
"cfg_latent": 1.0,
|
| 48 |
+
"interval_min": 0.0,
|
| 49 |
+
"interval_max": 1.0,
|
| 50 |
+
"interval_min_latent": 0.0,
|
| 51 |
+
"interval_max_latent": 1.0,
|
| 52 |
+
"gen_shift_pixel": 1.0,
|
| 53 |
+
"gen_shift_latent": 1.0,
|
| 54 |
+
"guidance_method": "cfg"
|
| 55 |
+
},
|
| 56 |
+
"data": {
|
| 57 |
+
"train_dir": "data/imagenet/train",
|
| 58 |
+
"val_dir": "data/imagenet/val",
|
| 59 |
+
"num_workers": 8,
|
| 60 |
+
"pin_memory": true,
|
| 61 |
+
"persistent_workers": true
|
| 62 |
+
},
|
| 63 |
+
"train": {
|
| 64 |
+
"epochs": 200,
|
| 65 |
+
"global_batch_size": 1024,
|
| 66 |
+
"eval_global_batch_size": 1024,
|
| 67 |
+
"grad_accum_steps": 1,
|
| 68 |
+
"grad_clip": 3.0,
|
| 69 |
+
"amp_dtype": "bf16",
|
| 70 |
+
"log_interval": 50
|
| 71 |
+
},
|
| 72 |
+
"visualization": {
|
| 73 |
+
"initial_visualization": true,
|
| 74 |
+
"vis_interval": 500,
|
| 75 |
+
"visualize_reconstruction": true,
|
| 76 |
+
"visualize_generation": false
|
| 77 |
+
},
|
| 78 |
+
"eval": {
|
| 79 |
+
"initial_eval": {
|
| 80 |
+
"reconstruction": false,
|
| 81 |
+
"generation": false
|
| 82 |
+
},
|
| 83 |
+
"gfid_interval": 0,
|
| 84 |
+
"rfid_interval": 10,
|
| 85 |
+
"gfid_stats_path": "",
|
| 86 |
+
"rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
|
| 87 |
+
"inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
|
| 88 |
+
"gfid_backend": "online",
|
| 89 |
+
"gfid_num_classes": 1000,
|
| 90 |
+
"gfid_num_images": 50000,
|
| 91 |
+
"rfid_num_images": 50000,
|
| 92 |
+
"batch_size": 128,
|
| 93 |
+
"num_workers": 8,
|
| 94 |
+
"gfid_metric_verbose": false,
|
| 95 |
+
"gfid_keep_images": false,
|
| 96 |
+
"gfid_cfg_scale": null,
|
| 97 |
+
"gfid_cfg_scale_latent": null,
|
| 98 |
+
"gfid_cfg_interval": null,
|
| 99 |
+
"gfid_cfg_interval_latent": null,
|
| 100 |
+
"gfid_steps": null,
|
| 101 |
+
"eval_ema": "1"
|
| 102 |
+
},
|
| 103 |
+
"optim": {
|
| 104 |
+
"name": "adamw",
|
| 105 |
+
"lr": 0.0001,
|
| 106 |
+
"lr_schedule": "constant",
|
| 107 |
+
"weight_decay": 0.0,
|
| 108 |
+
"betas": [
|
| 109 |
+
0.9,
|
| 110 |
+
0.95
|
| 111 |
+
],
|
| 112 |
+
"min_lr": 1e-06,
|
| 113 |
+
"warmup_epochs": 5
|
| 114 |
+
},
|
| 115 |
+
"checkpoint": {
|
| 116 |
+
"resume": "",
|
| 117 |
+
"auto_resume": true,
|
| 118 |
+
"save_interval": 1,
|
| 119 |
+
"keep_last": 3
|
| 120 |
+
},
|
| 121 |
+
"logging": {
|
| 122 |
+
"enable_wandb": false,
|
| 123 |
+
"entity": "",
|
| 124 |
+
"project": "diffusion-decoder",
|
| 125 |
+
"run_name": "jitcot_rec_only_vgg_convnext_imagenet256"
|
| 126 |
+
}
|
| 127 |
+
}
|
rec_only_dinov3/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_110622.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T11:06:22",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 299.427001953125,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 299.427001953125,
|
| 10 |
+
"rinception/score": 1.1586003303527832,
|
| 11 |
+
"rinception/std": 0.14029201865196228,
|
| 12 |
+
"rl1/score": 0.3669631064605713,
|
| 13 |
+
"rlpips/score": 0.799619822769165,
|
| 14 |
+
"rpsnr/score": 7.547315838012695
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_124235.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T12:42:35",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 22.071176528930664,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 22.071176528930664,
|
| 10 |
+
"rinception/score": 43.34564971923828,
|
| 11 |
+
"rinception/std": 3.7558352947235107,
|
| 12 |
+
"rl1/score": 0.2071618295097351,
|
| 13 |
+
"rlpips/score": 0.5011178592300415,
|
| 14 |
+
"rpsnr/score": 12.862273885498047
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_141847.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T14:18:47",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 9.413352012634277,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 9.413352012634277,
|
| 10 |
+
"rinception/score": 53.314453125,
|
| 11 |
+
"rinception/std": 6.155331134796143,
|
| 12 |
+
"rl1/score": 0.18956555074691772,
|
| 13 |
+
"rlpips/score": 0.45460860481262205,
|
| 14 |
+
"rpsnr/score": 13.669592930908204
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_155459.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T15:54:59",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 2.9257969856262207,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 2.9257969856262207,
|
| 10 |
+
"rinception/score": 57.289398193359375,
|
| 11 |
+
"rinception/std": 7.0953593254089355,
|
| 12 |
+
"rl1/score": 0.1296201045036316,
|
| 13 |
+
"rlpips/score": 0.3560291623687744,
|
| 14 |
+
"rpsnr/score": 16.702779267578126
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_173111.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T17:31:11",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.5682669878005981,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.5682669878005981,
|
| 10 |
+
"rinception/score": 58.81327438354492,
|
| 11 |
+
"rinception/std": 7.187229633331299,
|
| 12 |
+
"rl1/score": 0.10405946157455444,
|
| 13 |
+
"rlpips/score": 0.31758854347229004,
|
| 14 |
+
"rpsnr/score": 17.98356410522461
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_190723.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T19:07:23",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.3132721185684204,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.3132721185684204,
|
| 10 |
+
"rinception/score": 59.43439483642578,
|
| 11 |
+
"rinception/std": 7.511739253997803,
|
| 12 |
+
"rl1/score": 0.09370932280540466,
|
| 13 |
+
"rlpips/score": 0.2997593156814575,
|
| 14 |
+
"rpsnr/score": 18.51156289794922
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0007_epoch_0070_reconstruction_step_00087570_20260528_204340.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T20:43:40",
|
| 3 |
+
"epoch": 70,
|
| 4 |
+
"global_step": 87570,
|
| 5 |
+
"name": "epoch_0070_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.2280447483062744,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.2280447483062744,
|
| 10 |
+
"rinception/score": 59.451393127441406,
|
| 11 |
+
"rinception/std": 7.455377578735352,
|
| 12 |
+
"rl1/score": 0.09087045845985413,
|
| 13 |
+
"rlpips/score": 0.2918792335700989,
|
| 14 |
+
"rpsnr/score": 18.59110985961914
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0008_epoch_0080_reconstruction_step_00100080_20260528_221953.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T22:19:53",
|
| 3 |
+
"epoch": 80,
|
| 4 |
+
"global_step": 100080,
|
| 5 |
+
"name": "epoch_0080_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.2123676538467407,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.2123676538467407,
|
| 10 |
+
"rinception/score": 59.36212921142578,
|
| 11 |
+
"rinception/std": 7.355563163757324,
|
| 12 |
+
"rl1/score": 0.08963097463607789,
|
| 13 |
+
"rlpips/score": 0.28763298891067507,
|
| 14 |
+
"rpsnr/score": 18.55867121826172
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0009_epoch_0090_reconstruction_step_00112590_20260528_235612.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:56:12",
|
| 3 |
+
"epoch": 90,
|
| 4 |
+
"global_step": 112590,
|
| 5 |
+
"name": "epoch_0090_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.285521388053894,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.285521388053894,
|
| 10 |
+
"rinception/score": 59.30804443359375,
|
| 11 |
+
"rinception/std": 7.406274318695068,
|
| 12 |
+
"rl1/score": 0.09017072152137756,
|
| 13 |
+
"rlpips/score": 0.2866918243408203,
|
| 14 |
+
"rpsnr/score": 18.41382909301758
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/eval_results/eval_0010_epoch_0100_reconstruction_step_00125100_20260529_013235.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-29T01:32:35",
|
| 3 |
+
"epoch": 100,
|
| 4 |
+
"global_step": 125100,
|
| 5 |
+
"name": "epoch_0100_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 1.1464312076568604,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 1.1464312076568604,
|
| 10 |
+
"rinception/score": 59.60105514526367,
|
| 11 |
+
"rinception/std": 7.36441707611084,
|
| 12 |
+
"rl1/score": 0.08871454634666442,
|
| 13 |
+
"rlpips/score": 0.2819660251617432,
|
| 14 |
+
"rpsnr/score": 18.57816102661133
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3/logs/log.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
rec_only_dinov3/training_metrics.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
rec_only_dinov3_uf/config.resolved.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"seed": 42,
|
| 3 |
+
"output_dir": "work_dirs/rec_only_dinov3_uf",
|
| 4 |
+
"model": {
|
| 5 |
+
"img_size": 256,
|
| 6 |
+
"input_range": "minus_one_one",
|
| 7 |
+
"num_classes": 1000,
|
| 8 |
+
"encoder_type": "dinov3",
|
| 9 |
+
"encoder_model_size": "base",
|
| 10 |
+
"encoder_patch_size": 16,
|
| 11 |
+
"freeze_encoder_backbone": false,
|
| 12 |
+
"token_channels": 128,
|
| 13 |
+
"mask_ratio": 0.0,
|
| 14 |
+
"mask_ratio_min": 0.0,
|
| 15 |
+
"mask_ratio_type": "random",
|
| 16 |
+
"use_qknorm_encoder": false,
|
| 17 |
+
"latent_hw": 16,
|
| 18 |
+
"decoder_model": "JiTCoT-B/16",
|
| 19 |
+
"decoder_patch_size": 16,
|
| 20 |
+
"bottleneck_dim_latent": 128,
|
| 21 |
+
"dh_depth": 2,
|
| 22 |
+
"dh_hidden_size": 1024,
|
| 23 |
+
"attn_dropout": 0.0,
|
| 24 |
+
"proj_dropout": 0.0,
|
| 25 |
+
"enable_ema": true,
|
| 26 |
+
"ema_decay1": 0.9999,
|
| 27 |
+
"ema_decay2": 0.9998,
|
| 28 |
+
"label_drop_prob": 0.1,
|
| 29 |
+
"P_mean": -0.4,
|
| 30 |
+
"P_std": 0.8,
|
| 31 |
+
"latent_mean": -1.2,
|
| 32 |
+
"latent_std": 1.0,
|
| 33 |
+
"latent_weight": 0.0,
|
| 34 |
+
"perceptual_weight": 1.0,
|
| 35 |
+
"perceptual_net": "lpips-convnext_s-1.0-0.1",
|
| 36 |
+
"choose_latent_p": 0.0,
|
| 37 |
+
"sample_mode": "reconstruction_only",
|
| 38 |
+
"latent_max_t": 1.0,
|
| 39 |
+
"latent_pixel_offset": 0.0,
|
| 40 |
+
"latent_pixel_shift": 1.0,
|
| 41 |
+
"t_eps": 0.05,
|
| 42 |
+
"t_eps_inference": 0.05,
|
| 43 |
+
"noise_scale": 1.0,
|
| 44 |
+
"sampling_method": "heun",
|
| 45 |
+
"num_sampling_steps": 50,
|
| 46 |
+
"cfg": 1.0,
|
| 47 |
+
"cfg_latent": 1.0,
|
| 48 |
+
"interval_min": 0.0,
|
| 49 |
+
"interval_max": 1.0,
|
| 50 |
+
"interval_min_latent": 0.0,
|
| 51 |
+
"interval_max_latent": 1.0,
|
| 52 |
+
"gen_shift_pixel": 1.0,
|
| 53 |
+
"gen_shift_latent": 1.0,
|
| 54 |
+
"guidance_method": "cfg"
|
| 55 |
+
},
|
| 56 |
+
"data": {
|
| 57 |
+
"train_dir": "data/imagenet/train",
|
| 58 |
+
"val_dir": "data/imagenet/val",
|
| 59 |
+
"num_workers": 8,
|
| 60 |
+
"pin_memory": true,
|
| 61 |
+
"persistent_workers": true
|
| 62 |
+
},
|
| 63 |
+
"train": {
|
| 64 |
+
"epochs": 200,
|
| 65 |
+
"global_batch_size": 1024,
|
| 66 |
+
"eval_global_batch_size": 1024,
|
| 67 |
+
"grad_accum_steps": 1,
|
| 68 |
+
"grad_clip": 3.0,
|
| 69 |
+
"amp_dtype": "bf16",
|
| 70 |
+
"log_interval": 50
|
| 71 |
+
},
|
| 72 |
+
"visualization": {
|
| 73 |
+
"initial_visualization": true,
|
| 74 |
+
"vis_interval": 500,
|
| 75 |
+
"visualize_reconstruction": true,
|
| 76 |
+
"visualize_generation": false
|
| 77 |
+
},
|
| 78 |
+
"eval": {
|
| 79 |
+
"initial_eval": {
|
| 80 |
+
"reconstruction": false,
|
| 81 |
+
"generation": false
|
| 82 |
+
},
|
| 83 |
+
"gfid_interval": 0,
|
| 84 |
+
"rfid_interval": 10,
|
| 85 |
+
"gfid_stats_path": "",
|
| 86 |
+
"rfid_stats_path": "fid_stats/val_fid_statistics_file_256.npz",
|
| 87 |
+
"inception_weights": "fid_stats/weights-inception-2015-12-05-6726825d.pth",
|
| 88 |
+
"gfid_backend": "online",
|
| 89 |
+
"gfid_num_classes": 1000,
|
| 90 |
+
"gfid_num_images": 50000,
|
| 91 |
+
"rfid_num_images": 50000,
|
| 92 |
+
"batch_size": 128,
|
| 93 |
+
"num_workers": 8,
|
| 94 |
+
"gfid_metric_verbose": false,
|
| 95 |
+
"gfid_keep_images": false,
|
| 96 |
+
"gfid_cfg_scale": null,
|
| 97 |
+
"gfid_cfg_scale_latent": null,
|
| 98 |
+
"gfid_cfg_interval": null,
|
| 99 |
+
"gfid_cfg_interval_latent": null,
|
| 100 |
+
"gfid_steps": null,
|
| 101 |
+
"eval_ema": "1"
|
| 102 |
+
},
|
| 103 |
+
"optim": {
|
| 104 |
+
"name": "adamw",
|
| 105 |
+
"lr": 0.0001,
|
| 106 |
+
"lr_schedule": "constant",
|
| 107 |
+
"weight_decay": 0.0,
|
| 108 |
+
"betas": [
|
| 109 |
+
0.9,
|
| 110 |
+
0.95
|
| 111 |
+
],
|
| 112 |
+
"min_lr": 1e-06,
|
| 113 |
+
"warmup_epochs": 5
|
| 114 |
+
},
|
| 115 |
+
"checkpoint": {
|
| 116 |
+
"resume": "",
|
| 117 |
+
"auto_resume": true,
|
| 118 |
+
"save_interval": 1,
|
| 119 |
+
"keep_last": 3
|
| 120 |
+
},
|
| 121 |
+
"logging": {
|
| 122 |
+
"enable_wandb": false,
|
| 123 |
+
"entity": "",
|
| 124 |
+
"project": "diffusion-decoder",
|
| 125 |
+
"run_name": "jitcot_rec_only_vgg_convnext_imagenet256"
|
| 126 |
+
}
|
| 127 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0001_epoch_0010_reconstruction_step_00012510_20260528_134902.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T13:49:02",
|
| 3 |
+
"epoch": 10,
|
| 4 |
+
"global_step": 12510,
|
| 5 |
+
"name": "epoch_0010_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 143.6580352783203,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 143.6580352783203,
|
| 10 |
+
"rinception/score": 8.937095642089844,
|
| 11 |
+
"rinception/std": 1.9498242139816284,
|
| 12 |
+
"rl1/score": 0.12238822456359863,
|
| 13 |
+
"rlpips/score": 0.5494522420501708,
|
| 14 |
+
"rpsnr/score": 16.949355013427734
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0002_epoch_0020_reconstruction_step_00025020_20260528_153954.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T15:39:54",
|
| 3 |
+
"epoch": 20,
|
| 4 |
+
"global_step": 25020,
|
| 5 |
+
"name": "epoch_0020_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 5.159643173217773,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 5.159643173217773,
|
| 10 |
+
"rinception/score": 56.56890869140625,
|
| 11 |
+
"rinception/std": 6.62244987487793,
|
| 12 |
+
"rl1/score": 0.04988961963653565,
|
| 13 |
+
"rlpips/score": 0.21052068838119506,
|
| 14 |
+
"rpsnr/score": 24.29544946533203
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0003_epoch_0030_reconstruction_step_00037530_20260528_173108.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T17:31:08",
|
| 3 |
+
"epoch": 30,
|
| 4 |
+
"global_step": 37530,
|
| 5 |
+
"name": "epoch_0030_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.9540103673934937,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.9540103673934937,
|
| 10 |
+
"rinception/score": 60.06682205200195,
|
| 11 |
+
"rinception/std": 7.526337146759033,
|
| 12 |
+
"rl1/score": 0.029459762790203094,
|
| 13 |
+
"rlpips/score": 0.11677609906196594,
|
| 14 |
+
"rpsnr/score": 28.49137232421875
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0004_epoch_0040_reconstruction_step_00050040_20260528_192208.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T19:22:08",
|
| 3 |
+
"epoch": 40,
|
| 4 |
+
"global_step": 50040,
|
| 5 |
+
"name": "epoch_0040_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.450069397687912,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.450069397687912,
|
| 10 |
+
"rinception/score": 60.43627166748047,
|
| 11 |
+
"rinception/std": 7.46042013168335,
|
| 12 |
+
"rl1/score": 0.0233486088180542,
|
| 13 |
+
"rlpips/score": 0.09246708666801452,
|
| 14 |
+
"rpsnr/score": 30.35999076904297
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0005_epoch_0050_reconstruction_step_00062550_20260528_211317.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T21:13:17",
|
| 3 |
+
"epoch": 50,
|
| 4 |
+
"global_step": 62550,
|
| 5 |
+
"name": "epoch_0050_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.32373496890068054,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.32373496890068054,
|
| 10 |
+
"rinception/score": 60.77216339111328,
|
| 11 |
+
"rinception/std": 7.522408962249756,
|
| 12 |
+
"rl1/score": 0.02103063518643379,
|
| 13 |
+
"rlpips/score": 0.08168746644496917,
|
| 14 |
+
"rpsnr/score": 31.230824963378907
|
| 15 |
+
}
|
| 16 |
+
}
|
rec_only_dinov3_uf/eval_results/eval_0006_epoch_0060_reconstruction_step_00075060_20260528_230434.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-28T23:04:34",
|
| 3 |
+
"epoch": 60,
|
| 4 |
+
"global_step": 75060,
|
| 5 |
+
"name": "epoch_0060_reconstruction",
|
| 6 |
+
"stats": {
|
| 7 |
+
"rfid-val/score": 0.2859066128730774,
|
| 8 |
+
"rfid/num_images": 50000.0,
|
| 9 |
+
"rfid/score": 0.2859066128730774,
|
| 10 |
+
"rinception/score": 60.9921875,
|
| 11 |
+
"rinception/std": 7.5456461906433105,
|
| 12 |
+
"rl1/score": 0.02010475501537323,
|
| 13 |
+
"rlpips/score": 0.07721434003353118,
|
| 14 |
+
"rpsnr/score": 31.611067966308593
|
| 15 |
+
}
|
| 16 |
+
}
|