| | --- |
| | license: apache-2.0 |
| | base_model: runwayml/stable-diffusion-v1-5 |
| | tags: |
| | - text-to-image |
| | - diffusion-models |
| | - stable-diffusion |
| | - diffusers |
| | - image-generation |
| | - fast-sampling |
| | library_name: diffusers |
| | pipeline_tag: text-to-image |
| | --- |
| | # Image Diffusion Preview with Consistency Solver (Google DeepMind) |
| |
|
| | [paper](https://arxiv.org/abs/2512.13592) [code](https://github.com/G-U-N/consolver) [huggingface](https://huggingface.co/papers/2512.13592) [model](https://huggingface.co/wangfuyun/consolver) |
| |
|
| | # Quick Start |
| |
|
| | ```python |
| | Pythonimport torch |
| | from diffusers import StableDiffusionPipeline, DDIMScheduler |
| | from scheduler_ppo import PPOScheduler # Provided in this repo |
| | from huggingface_hub import hf_hub_download |
| | |
| | # Download the trained factor_net checkpoint |
| | factor_net_path = hf_hub_download( |
| | repo_id="wangfuyun/consolver", |
| | filename="model.ckpt" |
| | ) |
| | |
| | model_id = "runwayml/stable-diffusion-v1-5" |
| | prompt = "an astronaut riding a horse on the moon, highly detailed, 8k" |
| | num_inference_steps = 8 |
| | guidance_scale = 3.0 |
| | seed = 43 |
| | height = width = 512 |
| | |
| | def load_pipeline(scheduler_type="ddim"): |
| | if scheduler_type == "ppo": |
| | scheduler = PPOScheduler( |
| | beta_end=0.012, |
| | beta_schedule="scaled_linear", |
| | beta_start=0.00085, |
| | num_train_timesteps=1000, |
| | steps_offset=1, |
| | timestep_spacing="trailing", |
| | order_dim=4, |
| | scaler_dim=0, |
| | use_conv=False, |
| | factor_net_kwargs=dict(embedding_dim=64, hidden_dim=256, num_actions=11), |
| | ) |
| | else: |
| | scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler", timestep_spacing="trailing") |
| | |
| | pipe = StableDiffusionPipeline.from_pretrained( |
| | model_id, |
| | scheduler=scheduler, |
| | safety_checker=None, |
| | # torch_dtype=torch.float16, # Uncomment for GPU memory savings |
| | ).to("cuda") |
| | |
| | if scheduler_type == "ppo" and factor_net_path: |
| | weight = torch.load(factor_net_path, map_location="cpu") |
| | pipe.scheduler.factor_net.load_state_dict(weight) |
| | pipe.scheduler.factor_net.to("cuda") |
| | |
| | return pipe |
| | |
| | generator = torch.Generator("cuda").manual_seed(seed) |
| | |
| | # DDIM baseline (8 steps) |
| | pipe_ddim = load_pipeline("ddim") |
| | image_ddim = pipe_ddim(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, |
| | generator=generator, height=height, width=width).images[0] |
| | image_ddim.save("ddim_result.jpg") |
| | |
| | # ConSolver (8 steps) |
| | pipe_consolver = load_pipeline("ppo") |
| | image_consolver = pipe_consolver(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, |
| | generator=generator, height=height, width=width).images[0] |
| | image_consolver.save("consolver_result.jpg") |
| | ``` |
| |
|
| |
|
| |
|
| |
|
| | <div align="center"> |
| | <table> |
| | <tr> |
| | <td align="center"> |
| | <img src="https://github.com/user-attachments/assets/35f5f99a-ca5f-4919-82cf-04a67a2dbe13" alt="DDIM" width="80%" /> |
| | </td> |
| | <td align="center"> |
| | <img src="https://github.com/user-attachments/assets/6428a663-b488-4ecc-b79c-4fcb431d5630" alt="Consistency Solver" width="80%" /> |
| | </td> |
| | </tr> |
| | <tr> |
| | <td align="center"> |
| | <em>DDIM</em> |
| | </td> |
| | <td align="center"> |
| | <em>ConsistencySolver</em> |
| | </td> |
| | </tr> |
| | </table> |
| | </div> |
| | |