Buckets:

download
raw
52.5 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Latent Consistency Model&quot;,&quot;local&quot;:&quot;latent-consistency-model&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text-to-image&quot;,&quot;local&quot;:&quot;text-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Image-to-image&quot;,&quot;local&quot;:&quot;image-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inpainting&quot;,&quot;local&quot;:&quot;inpainting&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Adapters&quot;,&quot;local&quot;:&quot;adapters&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;LoRA&quot;,&quot;local&quot;:&quot;lora&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ControlNet&quot;,&quot;local&quot;:&quot;controlnet&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T2I-Adapter&quot;,&quot;local&quot;:&quot;t2i-adapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;AnimateDiff&quot;,&quot;local&quot;:&quot;animatediff&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/diffusers/pr_12411/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/entry/start.3e0bd2b6.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/scheduler.53228c21.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/singletons.65de65e1.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/index.e93d0901.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/paths.d8aaea9f.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/entry/app.075ca386.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/preload-helper.a3bbca49.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/index.100fac89.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/nodes/0.64b9a56d.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/nodes/336.e16bce6a.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/CopyLLMTxtMenu.c51e21f1.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/IconCopy.38cf8f56.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.5af3dadc.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/CodeBlock.d30a6509.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/DocNotebookDropdown.74a16910.js">
<link rel="modulepreload" href="/docs/diffusers/pr_12411/en/_app/immutable/chunks/HfOption.fad27e59.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Latent Consistency Model&quot;,&quot;local&quot;:&quot;latent-consistency-model&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text-to-image&quot;,&quot;local&quot;:&quot;text-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Image-to-image&quot;,&quot;local&quot;:&quot;image-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inpainting&quot;,&quot;local&quot;:&quot;inpainting&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Adapters&quot;,&quot;local&quot;:&quot;adapters&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;LoRA&quot;,&quot;local&quot;:&quot;lora&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ControlNet&quot;,&quot;local&quot;:&quot;controlnet&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T2I-Adapter&quot;,&quot;local&quot;:&quot;t2i-adapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;AnimateDiff&quot;,&quot;local&quot;:&quot;animatediff&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="latent-consistency-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#latent-consistency-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Latent Consistency Model</span></h1> <p data-svelte-h="svelte-fadb22"><a href="https://hf.co/papers/2310.04378" rel="nofollow">Latent Consistency Models (LCMs)</a> enable fast high-quality image generation by directly predicting the reverse diffusion process in the latent rather than pixel space. In other words, LCMs try to predict the noiseless image from the noisy image in contrast to typical diffusion models that iteratively remove noise from the noisy image. By avoiding the iterative sampling process, LCMs are able to generate high-quality images in 2-4 steps instead of 20-30 steps.</p> <p data-svelte-h="svelte-1nhlbs8">LCMs are distilled from pretrained models which requires ~32 hours of A100 compute. To speed this up, <a href="https://hf.co/papers/2311.05556" rel="nofollow">LCM-LoRAs</a> train a <a href="https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora" rel="nofollow">LoRA adapter</a> which have much fewer parameters to train compared to the full model. The LCM-LoRA can be plugged into a diffusion model once it has been trained.</p> <p data-svelte-h="svelte-vcrp3b">This guide will show you how to use LCMs and LCM-LoRAs for fast inference on tasks and how to use them with other adapters like ControlNet or T2I-Adapter.</p> <blockquote class="tip" data-svelte-h="svelte-k4lcym"><p>LCMs and LCM-LoRAs are available for Stable Diffusion v1.5, Stable Diffusion XL, and the SSD-1B model. You can find their checkpoints on the <a href="https://hf.co/collections/latent-consistency/latent-consistency-models-weights-654ce61a95edd6dffccef6a8" rel="nofollow">Latent Consistency</a> Collections.</p></blockquote> <h2 class="relative group"><a id="text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h2> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LCM </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">LCM-LoRA </div></div> <div class="language-select"><p data-svelte-h="svelte-1svjiy">To use LCMs, you need to load the LCM checkpoint for your supported model into <a href="/docs/diffusers/pr_12411/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a> and replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>. Then you can use the pipeline as usual, and pass a text prompt to generate an image in just 4 steps.</p> <p data-svelte-h="svelte-185mgq9">A couple of notes to keep in mind when using LCMs are:</p> <ul data-svelte-h="svelte-1mvqiqn"><li>Typically, batch size is doubled inside the pipeline for classifier-free guidance. But LCM applies guidance with guidance embeddings and doesn’t need to double the batch size, which leads to faster inference. The downside is that negative prompts don’t work with LCM because they don’t have any effect on the denoising process.</li> <li>The ideal range for <code>guidance_scale</code> is [3., 13.] because that is what the UNet was trained with. However, disabling <code>guidance_scale</code> with a value of 1.0 is also effective in most cases.</li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
<span class="hljs-keyword">import</span> torch
unet = UNet2DConditionModel.from_pretrained(
<span class="hljs-string">&quot;latent-consistency/lcm-sdxl&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
)
pipe = StableDiffusionXLPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>, unet=unet, torch_dtype=torch.float16, variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
prompt = <span class="hljs-string">&quot;Self-portrait oil painting, a beautiful cyborg with golden hair, 8k&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">8.0</span>
).images[<span class="hljs-number">0</span>]
image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-r54e1a"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdxl_t2i.png"></div> </div> <h2 class="relative group"><a id="image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image-to-image</span></h2> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LCM </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">LCM-LoRA </div></div> <div class="language-select"><p data-svelte-h="svelte-18wux7g">To use LCMs for image-to-image, you need to load the LCM checkpoint for your supported model into <a href="/docs/diffusers/pr_12411/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a> and replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>. Then you can use the pipeline as usual, and pass a text prompt and initial image to generate an image in just 4 steps.</p> <blockquote class="tip" data-svelte-h="svelte-no4hyc"><p>Experiment with different values for <code>num_inference_steps</code>, <code>strength</code>, and <code>guidance_scale</code> to get the best results.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForImage2Image, UNet2DConditionModel, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image
unet = UNet2DConditionModel.from_pretrained(
<span class="hljs-string">&quot;SimianLuo/LCM_Dreamshaper_v7&quot;</span>,
subfolder=<span class="hljs-string">&quot;unet&quot;</span>,
torch_dtype=torch.float16,
)
pipe = AutoPipelineForImage2Image.from_pretrained(
<span class="hljs-string">&quot;Lykon/dreamshaper-7&quot;</span>,
unet=unet,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
init_image = load_image(<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png&quot;</span>)
prompt = <span class="hljs-string">&quot;Astronauts in a jungle, cold color palette, muted colors, detailed, 8k&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt,
image=init_image,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">7.5</span>,
strength=<span class="hljs-number">0.5</span>,
generator=generator
).images[<span class="hljs-number">0</span>]
image<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-f0uu5w"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-img2img.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption></div></div> </div> <h2 class="relative group"><a id="inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpainting</span></h2> <p data-svelte-h="svelte-sj2mbj">To use LCM-LoRAs for inpainting, you need to replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a> and load the LCM-LoRA weights with the <a href="/docs/diffusers/pr_12411/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> method. Then you can use the pipeline as usual, and pass a text prompt, initial image, and mask image to generate an image in just 4 steps.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
pipe = AutoPipelineForInpainting.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-inpainting&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>)
init_image = load_image(<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png&quot;</span>)
mask_image = load_image(<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png&quot;</span>)
prompt = <span class="hljs-string">&quot;concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt,
image=init_image,
mask_image=mask_image,
generator=generator,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">4</span>,
).images[<span class="hljs-number">0</span>]
image<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-1olacag"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption></div></div> <h2 class="relative group"><a id="adapters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#adapters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Adapters</span></h2> <p data-svelte-h="svelte-i3adm8">LCMs are compatible with adapters like LoRA, ControlNet, T2I-Adapter, and AnimateDiff. You can bring the speed of LCMs to these adapters to generate images in a certain style or condition the model on another input like a canny image.</p> <h3 class="relative group"><a id="lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA</span></h3> <p data-svelte-h="svelte-2ivo4o"><a href="../tutorials/using_peft_for_inference">LoRA</a> adapters can be rapidly finetuned to learn a new style from just a few images and plugged into a pretrained model to generate images in that style.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LCM </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">LCM-LoRA </div></div> <div class="language-select"><p data-svelte-h="svelte-1aaympu">Load the LCM checkpoint for your supported model into <a href="/docs/diffusers/pr_12411/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a> and replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>. Then you can use the <a href="/docs/diffusers/pr_12411/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> method to load the LoRA weights into the LCM and generate a styled image in a few steps.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline, UNet2DConditionModel, LCMScheduler
<span class="hljs-keyword">import</span> torch
unet = UNet2DConditionModel.from_pretrained(
<span class="hljs-string">&quot;latent-consistency/lcm-sdxl&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
)
pipe = StableDiffusionXLPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>, unet=unet, torch_dtype=torch.float16, variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(<span class="hljs-string">&quot;TheLastBen/Papercut_SDXL&quot;</span>, weight_name=<span class="hljs-string">&quot;papercut.safetensors&quot;</span>, adapter_name=<span class="hljs-string">&quot;papercut&quot;</span>)
prompt = <span class="hljs-string">&quot;papercut, a cute fox&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">8.0</span>
).images[<span class="hljs-number">0</span>]
image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1205m44"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdx_lora_mix.png"></div> </div> <h3 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h3> <p data-svelte-h="svelte-1fviti6"><a href="./controlnet">ControlNet</a> are adapters that can be trained on a variety of inputs like canny edge, pose estimation, or depth. The ControlNet can be inserted into the pipeline to provide additional conditioning and control to the model for more accurate generation.</p> <p data-svelte-h="svelte-1wwp3s2">You can find additional ControlNet models trained on other inputs in <a href="https://hf.co/lllyasviel" rel="nofollow">lllyasviel’s</a> repository.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LCM </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">LCM-LoRA </div></div> <div class="language-select"><p data-svelte-h="svelte-a348xk">Load a ControlNet model trained on canny images and pass it to the <a href="/docs/diffusers/pr_12411/en/api/models/controlnet#diffusers.ControlNetModel">ControlNetModel</a>. Then you can load a LCM model into <a href="/docs/diffusers/pr_12411/en/api/pipelines/controlnet#diffusers.StableDiffusionControlNetPipeline">StableDiffusionControlNetPipeline</a> and replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>. Now pass the canny image to the pipeline and generate an image.</p> <blockquote class="tip" data-svelte-h="svelte-1os48ku"><p>Experiment with different values for <code>num_inference_steps</code>, <code>controlnet_conditioning_scale</code>, <code>cross_attention_kwargs</code>, and <code>guidance_scale</code> to get the best results.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> cv2
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
image = load_image(
<span class="hljs-string">&quot;https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png&quot;</span>
).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))
image = np.array(image)
low_threshold = <span class="hljs-number">100</span>
high_threshold = <span class="hljs-number">200</span>
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, <span class="hljs-literal">None</span>]
image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>)
canny_image = Image.fromarray(image)
controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">&quot;lllyasviel/sd-controlnet-canny&quot;</span>, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
<span class="hljs-string">&quot;SimianLuo/LCM_Dreamshaper_v7&quot;</span>,
controlnet=controlnet,
torch_dtype=torch.float16,
safety_checker=<span class="hljs-literal">None</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
<span class="hljs-string">&quot;the mona lisa&quot;</span>,
image=canny_image,
num_inference_steps=<span class="hljs-number">4</span>,
generator=generator,
).images[<span class="hljs-number">0</span>]
make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1f4gba0"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_full_sdv1-5_controlnet.png"></div> </div> <h3 class="relative group"><a id="t2i-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t2i-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T2I-Adapter</span></h3> <p data-svelte-h="svelte-tl9wrs"><a href="./t2i_adapter">T2I-Adapter</a> is an even more lightweight adapter than ControlNet, that provides an additional input to condition a pretrained model with. It is faster than ControlNet but the results may be slightly worse.</p> <p data-svelte-h="svelte-18ngb4o">You can find additional T2I-Adapter checkpoints trained on other inputs in <a href="https://hf.co/TencentARC" rel="nofollow">TencentArc’s</a> repository.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LCM </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">LCM-LoRA </div></div> <div class="language-select"><p data-svelte-h="svelte-pc4xtv">Load a T2IAdapter trained on canny images and pass it to the <a href="/docs/diffusers/pr_12411/en/api/pipelines/stable_diffusion/adapter#diffusers.StableDiffusionXLAdapterPipeline">StableDiffusionXLAdapterPipeline</a>. Then load a LCM checkpoint into <a href="/docs/diffusers/pr_12411/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a> and replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>. Now pass the canny image to the pipeline and generate an image.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> cv2
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLAdapterPipeline, UNet2DConditionModel, T2IAdapter, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
<span class="hljs-comment"># detect the canny map in low resolution to avoid high-frequency details</span>
image = load_image(
<span class="hljs-string">&quot;https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png&quot;</span>
).resize((<span class="hljs-number">384</span>, <span class="hljs-number">384</span>))
image = np.array(image)
low_threshold = <span class="hljs-number">100</span>
high_threshold = <span class="hljs-number">200</span>
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, <span class="hljs-literal">None</span>]
image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>)
canny_image = Image.fromarray(image).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">1216</span>))
adapter = T2IAdapter.from_pretrained(<span class="hljs-string">&quot;TencentARC/t2i-adapter-canny-sdxl-1.0&quot;</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">&quot;fp16&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
unet = UNet2DConditionModel.from_pretrained(
<span class="hljs-string">&quot;latent-consistency/lcm-sdxl&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
)
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>,
unet=unet,
adapter=adapter,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
prompt = <span class="hljs-string">&quot;the mona lisa, 4k picture, high quality&quot;</span>
negative_prompt = <span class="hljs-string">&quot;extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=canny_image,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">5</span>,
adapter_conditioning_scale=<span class="hljs-number">0.8</span>,
adapter_conditioning_factor=<span class="hljs-number">1</span>,
generator=generator,
).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1m6j6i7"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-t2i.png"></div> </div> <h3 class="relative group"><a id="animatediff" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#animatediff"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AnimateDiff</span></h3> <p data-svelte-h="svelte-14vivd7"><a href="../api/pipelines/animatediff">AnimateDiff</a> is an adapter that adds motion to an image. It can be used with most Stable Diffusion models, effectively turning them into “video generation” models. Generating good results with a video model usually requires generating multiple frames (16-24), which can be very slow with a regular Stable Diffusion model. LCM-LoRA can speed up this process by only taking 4-8 steps for each frame.</p> <p data-svelte-h="svelte-cnwg3s">Load a <a href="/docs/diffusers/pr_12411/en/api/pipelines/animatediff#diffusers.AnimateDiffPipeline">AnimateDiffPipeline</a> and pass a <code>MotionAdapter</code> to it. Then replace the scheduler with the <a href="/docs/diffusers/pr_12411/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>, and combine both LoRA adapters with the <code>~loaders.UNet2DConditionLoadersMixin.set_adapters</code> method. Now you can pass a prompt to the pipeline and generate an animated image.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> MotionAdapter, AnimateDiffPipeline, DDIMScheduler, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_gif
adapter = MotionAdapter.from_pretrained(<span class="hljs-string">&quot;guoyww/animatediff-motion-adapter-v1-5&quot;</span>)
pipe = AnimateDiffPipeline.from_pretrained(
<span class="hljs-string">&quot;frankjoshua/toonyou_beta6&quot;</span>,
motion_adapter=adapter,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>, adapter_name=<span class="hljs-string">&quot;lcm&quot;</span>)
pipe.load_lora_weights(<span class="hljs-string">&quot;guoyww/animatediff-motion-lora-zoom-in&quot;</span>, weight_name=<span class="hljs-string">&quot;diffusion_pytorch_model.safetensors&quot;</span>, adapter_name=<span class="hljs-string">&quot;motion-lora&quot;</span>)
pipe.set_adapters([<span class="hljs-string">&quot;lcm&quot;</span>, <span class="hljs-string">&quot;motion-lora&quot;</span>], adapter_weights=[<span class="hljs-number">0.55</span>, <span class="hljs-number">1.2</span>])
prompt = <span class="hljs-string">&quot;best quality, masterpiece, 1girl, looking at viewer, blurry background, upper body, contemporary, dress&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
frames = pipe(
prompt=prompt,
num_inference_steps=<span class="hljs-number">5</span>,
guidance_scale=<span class="hljs-number">1.25</span>,
cross_attention_kwargs={<span class="hljs-string">&quot;scale&quot;</span>: <span class="hljs-number">1</span>},
num_frames=<span class="hljs-number">24</span>,
generator=generator
).frames[<span class="hljs-number">0</span>]
export_to_gif(frames, <span class="hljs-string">&quot;animation.gif&quot;</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-8kha3o"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm-lora-animatediff.gif"></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/inference_with_lcm.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_r19zh6 = {
assets: "/docs/diffusers/pr_12411/en",
base: "/docs/diffusers/pr_12411/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/diffusers/pr_12411/en/_app/immutable/entry/start.3e0bd2b6.js"),
import("/docs/diffusers/pr_12411/en/_app/immutable/entry/app.075ca386.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 336],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
52.5 kB
·
Xet hash:
e377a48366a5bf891c2643ed655bb7915e48e61c2a7726689eef3925dbfc9f18

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.