Upload folder using huggingface_hub
Browse files
pretrain_imgnet_B_1024d16l.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_config": {
|
| 3 |
+
"image_dim": 4,
|
| 4 |
+
"patch_size": 2,
|
| 5 |
+
"decoder_style": "dit",
|
| 6 |
+
"num_layers": 16,
|
| 7 |
+
"hidden_size": 1024,
|
| 8 |
+
"intermediate_size": 4096,
|
| 9 |
+
"num_heads": 16,
|
| 10 |
+
"decode_mode": "diffusion"
|
| 11 |
+
},
|
| 12 |
+
"encoder_hidden_size": 768,
|
| 13 |
+
"latent_h": 32,
|
| 14 |
+
"latent_w": 32,
|
| 15 |
+
"latent_mean": [
|
| 16 |
+
-0.69,
|
| 17 |
+
-0.48,
|
| 18 |
+
-0.6,
|
| 19 |
+
0.28
|
| 20 |
+
],
|
| 21 |
+
"latent_std": [
|
| 22 |
+
12.38,
|
| 23 |
+
11.22,
|
| 24 |
+
7.93,
|
| 25 |
+
21.22
|
| 26 |
+
],
|
| 27 |
+
"pretrain_mode": "imagenet",
|
| 28 |
+
"source_ckpt": "ttvidt-dit-pretrain/tpigie65/checkpoints/epoch=19-step=100000.ckpt"
|
| 29 |
+
}
|
pretrain_imgnet_B_1024d16l.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71d51a561eafa5aeaf7f51a45594a09399c0ce5efadfdb1a79668235a736bc98
|
| 3 |
+
size 1553397056
|
pretrain_imgnet_S_768d12l.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_config": {
|
| 3 |
+
"image_dim": 4,
|
| 4 |
+
"patch_size": 2,
|
| 5 |
+
"decoder_style": "dit",
|
| 6 |
+
"num_layers": 12,
|
| 7 |
+
"hidden_size": 768,
|
| 8 |
+
"intermediate_size": 3072,
|
| 9 |
+
"num_heads": 12,
|
| 10 |
+
"decode_mode": "diffusion"
|
| 11 |
+
},
|
| 12 |
+
"encoder_hidden_size": 768,
|
| 13 |
+
"latent_h": 32,
|
| 14 |
+
"latent_w": 32,
|
| 15 |
+
"latent_mean": [
|
| 16 |
+
-0.69,
|
| 17 |
+
-0.48,
|
| 18 |
+
-0.6,
|
| 19 |
+
0.28
|
| 20 |
+
],
|
| 21 |
+
"latent_std": [
|
| 22 |
+
12.38,
|
| 23 |
+
11.22,
|
| 24 |
+
7.93,
|
| 25 |
+
21.22
|
| 26 |
+
],
|
| 27 |
+
"pretrain_mode": "imagenet",
|
| 28 |
+
"source_ckpt": "ttvidt-dit-pretrain/hpdabz9q/checkpoints/epoch=19-step=100000.ckpt"
|
| 29 |
+
}
|
pretrain_imgnet_S_768d12l.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5160b2f96b77671691199f288cba00372d908c48b09e70fe14e8f2f76cbe3b5d
|
| 3 |
+
size 656766368
|
pretrain_video_S_768d12l.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_config": {
|
| 3 |
+
"image_dim": 4,
|
| 4 |
+
"patch_size": 2,
|
| 5 |
+
"decoder_style": "dit",
|
| 6 |
+
"num_layers": 12,
|
| 7 |
+
"hidden_size": 768,
|
| 8 |
+
"intermediate_size": 3072,
|
| 9 |
+
"num_heads": 12,
|
| 10 |
+
"decode_mode": "diffusion"
|
| 11 |
+
},
|
| 12 |
+
"encoder_hidden_size": 768,
|
| 13 |
+
"latent_h": 32,
|
| 14 |
+
"latent_w": 32,
|
| 15 |
+
"latent_mean": [
|
| 16 |
+
-0.69,
|
| 17 |
+
-0.48,
|
| 18 |
+
-0.6,
|
| 19 |
+
0.28
|
| 20 |
+
],
|
| 21 |
+
"latent_std": [
|
| 22 |
+
12.38,
|
| 23 |
+
11.22,
|
| 24 |
+
7.93,
|
| 25 |
+
21.22
|
| 26 |
+
],
|
| 27 |
+
"pretrain_mode": "video",
|
| 28 |
+
"source_ckpt": "ttvidt-dit-pretrain/2u6ev4md/checkpoints/epoch=14-step=100000.ckpt"
|
| 29 |
+
}
|
pretrain_video_S_768d12l.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea31019a66e765a279125bf365838b83397fa2bd445846e2dd2b0a263205a29
|
| 3 |
+
size 656766368
|