PyraTok / vae /config.json
onkarsus13's picture
Update vae/config.json
7475c7f verified
{
"_class_name": "AutoencoderKLPyraTok",
"_diffusers_version": "0.37.0.dev0",
"_name_or_path": "/data/onkar/models_pyratok/vae",
"attn_scales": [],
"base_dim": 96,
"decoder_base_dim": null,
"dim_mult": [
1,
2,
4,
4
],
"dropout": 0.0,
"in_channels": 3,
"is_residual": false,
"lapq_codebook_dim": 16,
"lapq_commitment_weight": 0.5,
"lapq_entropy_weight": 0.5,
"lapq_inv_temperature": 100.0,
"lapq_num_codes": 65536,
"lapq_num_quantizers": 4,
"lapq_quantize_dropout": false,
"lapq_quantize_dropout_cutoff_index": 0,
"lapq_quantize_dropout_multiple_of": 1,
"lapq_text_condition_heads": 2,
"lapq_text_condition_scale": 0.7,
"lapq_text_embed_dim": 256,
"lapq_text_input_dim": 2560,
"lapq_text_mlp_hidden_dim": 1024,
"latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"num_res_blocks": 2,
"out_channels": 3,
"patch_size": null,
"scale_factor_spatial": 8,
"scale_factor_temporal": 4,
"temperal_downsample": [
false,
true,
true
],
"use_lapq_quantizer": false,
"z_dim": 16
}