Instructions to use tiny-random/z-image with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use tiny-random/z-image with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("tiny-random/z-image", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Draw Things
- DiffusionBee
| library_name: diffusers | |
| pipeline_tag: text-to-image | |
| inference: true | |
| base_model: | |
| - Tongyi-MAI/Z-Image-Turbo | |
| This tiny model is for debugging. It is randomly initialized with the config adapted from [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo). | |
| File size: | |
| - 2.4MB text_encoder/model.safetensors | |
| - 1.4MB transformer/diffusion_pytorch_model.safetensors | |
| - 0.5MB vae/diffusion_pytorch_model.safetensors | |
| ### Example usage: | |
| ```python | |
| import torch | |
| from diffusers import ZImagePipeline | |
| model_id = "tiny-random/z-image" | |
| torch_dtype = torch.bfloat16 | |
| device = "cuda" | |
| pipe = ZImagePipeline.from_pretrained(model_id, torch_dtype=torch_dtype) | |
| pipe = pipe.to(device) | |
| prompt = "Flowers and trees" | |
| image = pipe( | |
| prompt=prompt, | |
| height=1024, | |
| width=1024, | |
| num_inference_steps=9, # This actually results in 8 DiT forwards | |
| guidance_scale=0.0, # Guidance should be 0 for the Turbo models | |
| generator=torch.Generator("cuda").manual_seed(42), | |
| ).images[0] | |
| print(image) | |
| ``` | |
| ### Codes to create this repo: | |
| ```python | |
| import json | |
| import torch | |
| from diffusers import ( | |
| AutoencoderKL, | |
| DiffusionPipeline, | |
| FlowMatchEulerDiscreteScheduler, | |
| ZImagePipeline, | |
| ZImageTransformer2DModel, | |
| ) | |
| from huggingface_hub import hf_hub_download | |
| from transformers import AutoConfig, AutoTokenizer, Qwen2Tokenizer, Qwen3Model | |
| from transformers.generation import GenerationConfig | |
| source_model_id = "Tongyi-MAI/Z-Image-Turbo" | |
| save_folder = "/tmp/tiny-random/z-image" | |
| torch.set_default_dtype(torch.bfloat16) | |
| scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained( | |
| source_model_id, subfolder='scheduler') | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| source_model_id, subfolder='tokenizer') | |
| def save_json(path, obj): | |
| import json | |
| from pathlib import Path | |
| Path(path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(path, 'w', encoding='utf-8') as f: | |
| json.dump(obj, f, indent=2, ensure_ascii=False) | |
| def init_weights(model): | |
| import torch | |
| torch.manual_seed(42) | |
| with torch.no_grad(): | |
| for name, p in sorted(model.named_parameters()): | |
| torch.nn.init.normal_(p, 0, 0.1) | |
| print(name, p.shape, p.dtype, p.device) | |
| with open(hf_hub_download(source_model_id, filename='text_encoder/config.json', repo_type='model'), 'r', encoding='utf - 8') as f: | |
| config = json.load(f) | |
| config.update({ | |
| "head_dim": 32, | |
| 'hidden_size': 8, | |
| 'intermediate_size': 32, | |
| 'max_window_layers': 1, | |
| 'num_attention_heads': 8, | |
| 'num_hidden_layers': 2, | |
| 'num_key_value_heads': 4, | |
| 'tie_word_embeddings': True, | |
| }) | |
| save_json(f'{save_folder}/text_encoder/config.json', config) | |
| text_encoder_config = AutoConfig.from_pretrained( | |
| f'{save_folder}/text_encoder') | |
| text_encoder = Qwen3Model(text_encoder_config).to(torch.bfloat16) | |
| generation_config = GenerationConfig.from_pretrained( | |
| source_model_id, subfolder='text_encoder') | |
| text_encoder.generation_config = generation_config | |
| init_weights(text_encoder) | |
| with open(hf_hub_download(source_model_id, filename='transformer/config.json', repo_type='model'), 'r', encoding='utf-8') as f: | |
| config = json.load(f) | |
| config.update({ | |
| 'dim': 64, | |
| 'axes_dims': [8, 8, 16], | |
| 'n_heads': 2, | |
| 'n_kv_heads': 4, | |
| 'n_layers': 2, | |
| 'cap_feat_dim': 8, | |
| 'in_channels': 8, | |
| }) | |
| save_json(f'{save_folder}/transformer/config.json', config) | |
| transformer_config = ZImageTransformer2DModel.load_config( | |
| f'{save_folder}/transformer') | |
| transformer = ZImageTransformer2DModel.from_config( | |
| transformer_config) | |
| init_weights(transformer) | |
| with open(hf_hub_download(source_model_id, filename='vae/config.json', repo_type='model'), 'r', encoding='utf-8') as f: | |
| config = json.load(f) | |
| config.update({ | |
| 'layers_per_block': 1, | |
| 'block_out_channels': [32, 32], | |
| 'latent_channels': 8, | |
| 'down_block_types': ['DownEncoderBlock2D', 'DownEncoderBlock2D'], | |
| 'up_block_types': ['UpDecoderBlock2D', 'UpDecoderBlock2D'] | |
| }) | |
| save_json(f'{save_folder}/vae/config.json', config) | |
| vae_config = AutoencoderKL.load_config(f'{save_folder}/vae') | |
| vae = AutoencoderKL.from_config(vae_config) | |
| init_weights(vae) | |
| pipeline = ZImagePipeline( | |
| scheduler=scheduler, | |
| text_encoder=text_encoder, | |
| tokenizer=tokenizer, | |
| transformer=transformer, | |
| vae=vae, | |
| ) | |
| pipeline = pipeline.to(torch.bfloat16) | |
| pipeline.save_pretrained(save_folder, safe_serialization=True) | |
| print(pipeline) | |
| ``` |