wlsaidhi commited on
Commit
bd0e8a2
·
verified ·
1 Parent(s): 37d90cd

Initial conversion from stabilityai/stable-audio-open-small via scripts/checkpoint_conversion/stable_audio_to_diffusers.py

Browse files
conditioner/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioMultiConditioner",
3
+ "cond_dim": 768,
4
+ "configs": [
5
+ {
6
+ "id": "prompt",
7
+ "type": "t5",
8
+ "config": {
9
+ "t5_model_name": "t5-base",
10
+ "max_length": 64
11
+ }
12
+ },
13
+ {
14
+ "id": "seconds_total",
15
+ "type": "number",
16
+ "config": {
17
+ "min_val": 0,
18
+ "max_val": 256
19
+ }
20
+ }
21
+ ]
22
+ }
conditioner/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2433cfd09378c04b1f9208cdb181f731ff245303d5ac61de7cfe096f79274d
3
+ size 793440
model_index.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioPipeline",
3
+ "_fastvideo_converted_from": "stabilityai/stable-audio-open-small",
4
+ "transformer": [
5
+ "fastvideo.models.dits.stable_audio",
6
+ "StableAudioDiT"
7
+ ],
8
+ "vae": [
9
+ "fastvideo.models.vaes.oobleck",
10
+ "OobleckVAE"
11
+ ],
12
+ "conditioner": [
13
+ "fastvideo.models.encoders.stable_audio_conditioner",
14
+ "StableAudioMultiConditioner"
15
+ ]
16
+ }
transformer/config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioDiT",
3
+ "io_channels": 64,
4
+ "embed_dim": 1024,
5
+ "depth": 16,
6
+ "num_heads": 8,
7
+ "cond_token_dim": 768,
8
+ "global_cond_dim": 768,
9
+ "transformer_type": "continuous_transformer",
10
+ "attn_kwargs": {
11
+ "qk_norm": "ln"
12
+ },
13
+ "cross_attention_cond_ids": [
14
+ "prompt",
15
+ "seconds_total"
16
+ ],
17
+ "global_cond_ids": [
18
+ "seconds_total"
19
+ ]
20
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab05de76714f37cd664d653094ce4b985deddfb6916b76090aa18a030f2829fb
3
+ size 1363757944
vae/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "OobleckVAE",
3
+ "io_channels": 2,
4
+ "latent_dim": 64,
5
+ "downsampling_ratio": 2048,
6
+ "encoder_config": {
7
+ "in_channels": 2,
8
+ "channels": 128,
9
+ "c_mults": [
10
+ 1,
11
+ 2,
12
+ 4,
13
+ 8,
14
+ 16
15
+ ],
16
+ "strides": [
17
+ 2,
18
+ 4,
19
+ 4,
20
+ 8,
21
+ 8
22
+ ],
23
+ "latent_dim": 128,
24
+ "use_snake": true
25
+ },
26
+ "decoder_config": {
27
+ "out_channels": 2,
28
+ "channels": 128,
29
+ "c_mults": [
30
+ 1,
31
+ 2,
32
+ 4,
33
+ 8,
34
+ 16
35
+ ],
36
+ "strides": [
37
+ 2,
38
+ 4,
39
+ 4,
40
+ 8,
41
+ 8
42
+ ],
43
+ "latent_dim": 64,
44
+ "use_snake": true,
45
+ "final_tanh": false
46
+ }
47
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fca83fabbdd3cb7dc97b83e2c43aedc5d64ddb00da8771e3af6c86e19e07249
3
+ size 312265524