File size: 1,450 Bytes
5b40df0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7475c7f
5b40df0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
  "_class_name": "AutoencoderKLPyraTok",
  "_diffusers_version": "0.37.0.dev0",
  "_name_or_path": "/data/onkar/models_pyratok/vae",
  "attn_scales": [],
  "base_dim": 96,
  "decoder_base_dim": null,
  "dim_mult": [
    1,
    2,
    4,
    4
  ],
  "dropout": 0.0,
  "in_channels": 3,
  "is_residual": false,
  "lapq_codebook_dim": 16,
  "lapq_commitment_weight": 0.5,
  "lapq_entropy_weight": 0.5,
  "lapq_inv_temperature": 100.0,
  "lapq_num_codes": 65536,
  "lapq_num_quantizers": 4,
  "lapq_quantize_dropout": false,
  "lapq_quantize_dropout_cutoff_index": 0,
  "lapq_quantize_dropout_multiple_of": 1,
  "lapq_text_condition_heads": 2,
  "lapq_text_condition_scale": 0.7,
  "lapq_text_embed_dim": 256,
  "lapq_text_input_dim": 2560,
  "lapq_text_mlp_hidden_dim": 1024,
  "latents_mean": [
    -0.7571,
    -0.7089,
    -0.9113,
    0.1075,
    -0.1745,
    0.9653,
    -0.1517,
    1.5508,
    0.4134,
    -0.0715,
    0.5517,
    -0.3632,
    -0.1922,
    -0.9497,
    0.2503,
    -0.2921
  ],
  "latents_std": [
    2.8184,
    1.4541,
    2.3275,
    2.6558,
    1.2196,
    1.7708,
    2.6052,
    2.0743,
    3.2687,
    2.1526,
    2.8652,
    1.5579,
    1.6382,
    1.1253,
    2.8251,
    1.916
  ],
  "num_res_blocks": 2,
  "out_channels": 3,
  "patch_size": null,
  "scale_factor_spatial": 8,
  "scale_factor_temporal": 4,
  "temperal_downsample": [
    false,
    true,
    true
  ],
  "use_lapq_quantizer": false,
  "z_dim": 16
}