| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import omegaconf |
| |
|
| | from .pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer |
| | from .lazy_config_init import LazyCall as L |
| |
|
| | TOKENIZER_OPTIONS = {} |
| |
|
| |
|
| | def tokenizer_register(key): |
| | def decorator(func): |
| | TOKENIZER_OPTIONS[key] = func |
| | return func |
| |
|
| | return decorator |
| |
|
| |
|
| | @tokenizer_register("cosmos_diffusion_tokenizer_comp8x8x8") |
| | def get_cosmos_diffusion_tokenizer_comp8x8x8(resolution: str, chunk_duration: int) -> omegaconf.dictconfig.DictConfig: |
| | assert resolution in ["720"] |
| |
|
| | pixel_chunk_duration = chunk_duration |
| | temporal_compression_factor = 8 |
| | spatial_compression_factor = 8 |
| |
|
| | return L(JointImageVideoSharedJITTokenizer)( |
| | video_vae=L(VideoJITTokenizer)( |
| | name="cosmos_1_0_diffusion_tokenizer", |
| | latent_ch=16, |
| | is_bf16=True, |
| | pixel_chunk_duration=pixel_chunk_duration, |
| | temporal_compression_factor=temporal_compression_factor, |
| | spatial_compression_factor=spatial_compression_factor, |
| | spatial_resolution=resolution, |
| | ), |
| | image_vae=L(JITVAE)( |
| | name="cosmos_1_0_diffusion_tokenizer", |
| | latent_ch=16, |
| | is_image=False, |
| | is_bf16=True, |
| | ), |
| | name="cosmos_1_0_diffusion_tokenizer", |
| | latent_ch=16, |
| | ) |
| |
|