Buckets:

hf-doc-build/doc / diffusers /main /en /using-diffusers /inference_with_lcm_lora.html
rtrm's picture
download
raw
53.4 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Performing inference with LCM-LoRA&quot;,&quot;local&quot;:&quot;performing-inference-with-lcm-lora&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text-to-image&quot;,&quot;local&quot;:&quot;text-to-image&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Inference with a fine-tuned model&quot;,&quot;local&quot;:&quot;inference-with-a-fine-tuned-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Image-to-image&quot;,&quot;local&quot;:&quot;image-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Combine with styled LoRAs&quot;,&quot;local&quot;:&quot;combine-with-styled-loras&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;ControlNet/T2I-Adapter&quot;,&quot;local&quot;:&quot;controlnett2i-adapter&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;ControlNet&quot;,&quot;local&quot;:&quot;controlnet&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T2I-Adapter&quot;,&quot;local&quot;:&quot;t2i-adapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inpainting&quot;,&quot;local&quot;:&quot;inpainting&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;AnimateDiff&quot;,&quot;local&quot;:&quot;animatediff&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/diffusers/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/start.dea43253.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/scheduler.182ea377.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/singletons.463df26e.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.1f6d62f6.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/paths.63e6068d.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/app.1d224620.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.abf12888.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/0.6c9ef567.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/176.da677ddd.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/Tip.230e2334.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/CodeBlock.57fe6e13.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/DocNotebookDropdown.5fa27ace.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/EditOnGithub.9b8e78e4.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Performing inference with LCM-LoRA&quot;,&quot;local&quot;:&quot;performing-inference-with-lcm-lora&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text-to-image&quot;,&quot;local&quot;:&quot;text-to-image&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Inference with a fine-tuned model&quot;,&quot;local&quot;:&quot;inference-with-a-fine-tuned-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Image-to-image&quot;,&quot;local&quot;:&quot;image-to-image&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Combine with styled LoRAs&quot;,&quot;local&quot;:&quot;combine-with-styled-loras&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;ControlNet/T2I-Adapter&quot;,&quot;local&quot;:&quot;controlnett2i-adapter&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;ControlNet&quot;,&quot;local&quot;:&quot;controlnet&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T2I-Adapter&quot;,&quot;local&quot;:&quot;t2i-adapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inpainting&quot;,&quot;local&quot;:&quot;inpainting&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;AnimateDiff&quot;,&quot;local&quot;:&quot;animatediff&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="performing-inference-with-lcm-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#performing-inference-with-lcm-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Performing inference with LCM-LoRA</span></h1> <p data-svelte-h="svelte-2fpkx0">Latent Consistency Models (LCM) enable quality image generation in typically 2-4 steps making it possible to use diffusion models in almost real-time settings.</p> <p data-svelte-h="svelte-d9zkw1">From the <a href="https://latent-consistency-models.github.io/" rel="nofollow">official website</a>:</p> <blockquote data-svelte-h="svelte-9ngsrn"><p>LCMs can be distilled from any pre-trained Stable Diffusion (SD) in only 4,000 training steps (~32 A100 GPU Hours) for generating high quality 768 x 768 resolution images in 2~4 steps or even one step, significantly accelerating text-to-image generation. We employ LCM to distill the Dreamshaper-V7 version of SD in just 4,000 training iterations.</p></blockquote> <p data-svelte-h="svelte-yzou0e">For a more technical overview of LCMs, refer to <a href="https://huggingface.co/papers/2310.04378" rel="nofollow">the paper</a>.</p> <p data-svelte-h="svelte-sqxks">However, each model needs to be distilled separately for latent consistency distillation. The core idea with LCM-LoRA is to train just a few adapter layers, the adapter being LoRA in this case.
This way, we don’t have to train the full model and keep the number of trainable parameters manageable. The resulting LoRAs can then be applied to any fine-tuned version of the model without distilling them separately.
Additionally, the LoRAs can be applied to image-to-image, ControlNet/T2I-Adapter, inpainting, AnimateDiff etc.
The LCM-LoRA can also be combined with other LoRAs to generate styled images in very few steps (4-8).</p> <p data-svelte-h="svelte-121irgb">LCM-LoRAs are available for <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow">stable-diffusion-v1-5</a>, <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0" rel="nofollow">stable-diffusion-xl-base-1.0</a>, and the <a href="https://huggingface.co/segmind/SSD-1B" rel="nofollow">SSD-1B</a> model. All the checkpoints can be found in this <a href="https://huggingface.co/collections/latent-consistency/latent-consistency-models-loras-654cdd24e111e16f0865fba6" rel="nofollow">collection</a>.</p> <p data-svelte-h="svelte-1op1l2i">For more details about LCM-LoRA, refer to <a href="https://huggingface.co/papers/2311.05556" rel="nofollow">the technical report</a>.</p> <p data-svelte-h="svelte-1pmm8o2">This guide shows how to perform inference with LCM-LoRAs for</p> <ul data-svelte-h="svelte-1utl29a"><li>text-to-image</li> <li>image-to-image</li> <li>combined with styled LoRAs</li> <li>ControlNet/T2I-Adapter</li> <li>inpainting</li> <li>AnimateDiff</li></ul> <p data-svelte-h="svelte-1l89nj1">Before going through this guide, we’ll take a look at the general workflow for performing inference with LCM-LoRAs.
LCM-LoRAs are similar to other Stable Diffusion LoRAs so they can be used with any <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> that supports LoRAs.</p> <ul data-svelte-h="svelte-1or8ebm"><li>Load the task specific pipeline and model.</li> <li>Set the scheduler to <a href="/docs/diffusers/main/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>.</li> <li>Load the LCM-LoRA weights for the model.</li> <li>Reduce the <code>guidance_scale</code> between <code>[1.0, 2.0]</code> and set the <code>num_inference_steps</code> between [4, 8].</li> <li>Perform inference with the pipeline with the usual parameters.</li></ul> <p data-svelte-h="svelte-19h4utp">Let’s look at how we can perform inference with LCM-LoRAs for different tasks.</p> <p data-svelte-h="svelte-qd8sxg">First, make sure you have <a href="https://github.com/huggingface/peft" rel="nofollow">peft</a> installed, for better LoRA support.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U peft<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h2> <p data-svelte-h="svelte-10c0x65">You’ll use the <a href="/docs/diffusers/main/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline">StableDiffusionXLPipeline</a> with the scheduler: <a href="/docs/diffusers/main/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a> and then load the LCM-LoRA. Together with the LCM-LoRA and the scheduler, the pipeline enables a fast inference workflow overcoming the slow iterative nature of diffusion models.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler
pipe = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
torch_dtype=torch.float16
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdxl&quot;</span>)
prompt = <span class="hljs-string">&quot;Self-portrait oil painting, a beautiful cyborg with golden hair, 8k&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">42</span>)
image = pipe(
prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">1.0</span>
).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zmq986"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2i.png"></p> <p data-svelte-h="svelte-n61tgz">Notice that we use only 4 steps for generation which is way less than what’s typically used for standard SDXL.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-x2r697">You may have noticed that we set <code>guidance_scale=1.0</code>, which disables classifer-free-guidance. This is because the LCM-LoRA is trained with guidance, so the batch size does not have to be doubled in this case. This leads to a faster inference time, with the drawback that negative prompts don’t have any effect on the denoising process.</p> <p data-svelte-h="svelte-1sl97s7">You can also use guidance with LCM-LoRA, but due to the nature of training the model is very sensitve to the <code>guidance_scale</code> values, high values can lead to artifacts in the generated images. In our experiments, we found that the best values are in the range of [1.0, 2.0].</p></div> <h3 class="relative group"><a id="inference-with-a-fine-tuned-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-with-a-fine-tuned-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference with a fine-tuned model</span></h3> <p data-svelte-h="svelte-15rr98x">As mentioned above, the LCM-LoRA can be applied to any fine-tuned version of the model without having to distill them separately. Let’s look at how we can perform inference with a fine-tuned model. In this example, we’ll use the <a href="https://huggingface.co/Linaqruf/animagine-xl" rel="nofollow">animagine-xl</a> model, which is a fine-tuned version of the SDXL model for generating anime.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler
pipe = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;Linaqruf/animagine-xl&quot;</span>,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
torch_dtype=torch.float16
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdxl&quot;</span>)
prompt = <span class="hljs-string">&quot;face focus, cute, masterpiece, best quality, 1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, night, turtleneck&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">1.0</span>
).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19vr4c5"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2i_finetuned.png"></p> <h2 class="relative group"><a id="image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image-to-image</span></h2> <p data-svelte-h="svelte-2suklx">LCM-LoRA can be applied to image-to-image tasks too. Let’s look at how we can perform image-to-image generation with LCMs. For this example we’ll use the <a href="https://huggingface.co/Lykon/dreamshaper-7" rel="nofollow">dreamshaper-7</a> model and the LCM-LoRA for <code>stable-diffusion-v1-5 </code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForImage2Image, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid, load_image
pipe = AutoPipelineForImage2Image.from_pretrained(
<span class="hljs-string">&quot;Lykon/dreamshaper-7&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>)
<span class="hljs-comment"># prepare image</span>
url = <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png&quot;</span>
init_image = load_image(url)
prompt = <span class="hljs-string">&quot;Astronauts in a jungle, cold color palette, muted colors, detailed, 8k&quot;</span>
<span class="hljs-comment"># pass prompt and image to pipeline</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt,
image=init_image,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">1</span>,
strength=<span class="hljs-number">0.6</span>,
generator=generator
).images[<span class="hljs-number">0</span>]
make_image_grid([init_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zt9d3e"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_i2i.png"></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-18pswq">You can get different results based on your prompt and the image you provide. To get the best results, we recommend trying different values for <code>num_inference_steps</code>, <code>strength</code>, and <code>guidance_scale</code> parameters and choose the best one.</p></div> <h2 class="relative group"><a id="combine-with-styled-loras" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#combine-with-styled-loras"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Combine with styled LoRAs</span></h2> <p data-svelte-h="svelte-1ueo85e">LCM-LoRA can be combined with other LoRAs to generate styled-images in very few steps (4-8). In the following example, we’ll use the LCM-LoRA with the <a href="TheLastBen/Papercut_SDXL">papercut LoRA</a>.
To learn more about how to combine LoRAs, refer to <a href="https://huggingface.co/docs/diffusers/tutorials/using_peft_for_inference#combine-multiple-adapters" rel="nofollow">this guide</a>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler
pipe = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
torch_dtype=torch.float16
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LoRAs</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdxl&quot;</span>, adapter_name=<span class="hljs-string">&quot;lcm&quot;</span>)
pipe.load_lora_weights(<span class="hljs-string">&quot;TheLastBen/Papercut_SDXL&quot;</span>, weight_name=<span class="hljs-string">&quot;papercut.safetensors&quot;</span>, adapter_name=<span class="hljs-string">&quot;papercut&quot;</span>)
<span class="hljs-comment"># Combine LoRAs</span>
pipe.set_adapters([<span class="hljs-string">&quot;lcm&quot;</span>, <span class="hljs-string">&quot;papercut&quot;</span>], adapter_weights=[<span class="hljs-number">1.0</span>, <span class="hljs-number">0.8</span>])
prompt = <span class="hljs-string">&quot;papercut, a cute fox&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(prompt, num_inference_steps=<span class="hljs-number">4</span>, guidance_scale=<span class="hljs-number">1</span>, generator=generator).images[<span class="hljs-number">0</span>]
image<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9tzi4"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdx_lora_mix.png"></p> <h2 class="relative group"><a id="controlnett2i-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnett2i-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet/T2I-Adapter</span></h2> <p data-svelte-h="svelte-db61xh">Let’s look at how we can perform inference with ControlNet/T2I-Adapter and LCM-LoRA.</p> <h3 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h3> <p data-svelte-h="svelte-1cso8xy">For this example, we’ll use the SD-v1-5 model and the LCM-LoRA for SD-v1-5 with canny ControlNet.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> cv2
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image
image = load_image(
<span class="hljs-string">&quot;https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png&quot;</span>
).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))
image = np.array(image)
low_threshold = <span class="hljs-number">100</span>
high_threshold = <span class="hljs-number">200</span>
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, <span class="hljs-literal">None</span>]
image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>)
canny_image = Image.fromarray(image)
controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">&quot;lllyasviel/sd-controlnet-canny&quot;</span>, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>,
controlnet=controlnet,
torch_dtype=torch.float16,
safety_checker=<span class="hljs-literal">None</span>,
variant=<span class="hljs-string">&quot;fp16&quot;</span>
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>)
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
<span class="hljs-string">&quot;the mona lisa&quot;</span>,
image=canny_image,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">1.5</span>,
controlnet_conditioning_scale=<span class="hljs-number">0.8</span>,
cross_attention_kwargs={<span class="hljs-string">&quot;scale&quot;</span>: <span class="hljs-number">1</span>},
generator=generator,
).images[<span class="hljs-number">0</span>]
make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11lx81y"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_controlnet.png"></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one.</div> <h3 class="relative group"><a id="t2i-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t2i-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T2I-Adapter</span></h3> <p data-svelte-h="svelte-5l31pr">This example shows how to use the LCM-LoRA with the <a href="TencentARC/t2i-adapter-canny-sdxl-1.0">Canny T2I-Adapter</a> and SDXL.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> cv2
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLAdapterPipeline, T2IAdapter, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
<span class="hljs-comment"># Prepare image</span>
<span class="hljs-comment"># Detect the canny map in low resolution to avoid high-frequency details</span>
image = load_image(
<span class="hljs-string">&quot;https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_canny.jpg&quot;</span>
).resize((<span class="hljs-number">384</span>, <span class="hljs-number">384</span>))
image = np.array(image)
low_threshold = <span class="hljs-number">100</span>
high_threshold = <span class="hljs-number">200</span>
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, <span class="hljs-literal">None</span>]
image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>)
canny_image = Image.fromarray(image).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">1024</span>))
<span class="hljs-comment"># load adapter</span>
adapter = T2IAdapter.from_pretrained(<span class="hljs-string">&quot;TencentARC/t2i-adapter-canny-sdxl-1.0&quot;</span>, torch_dtype=torch.float16, varient=<span class="hljs-string">&quot;fp16&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
<span class="hljs-string">&quot;stabilityai/stable-diffusion-xl-base-1.0&quot;</span>,
adapter=adapter,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdxl&quot;</span>)
prompt = <span class="hljs-string">&quot;Mystical fairy in real, magic, 4k picture, high quality&quot;</span>
negative_prompt = <span class="hljs-string">&quot;extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=canny_image,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">1.5</span>,
adapter_conditioning_scale=<span class="hljs-number">0.8</span>,
adapter_conditioning_factor=<span class="hljs-number">1</span>,
generator=generator,
).images[<span class="hljs-number">0</span>]
make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-iuzcjf"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2iadapter.png"></p> <h2 class="relative group"><a id="inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpainting</span></h2> <p data-svelte-h="svelte-fhcmt1">LCM-LoRA can be used for inpainting as well.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
pipe = AutoPipelineForInpainting.from_pretrained(
<span class="hljs-string">&quot;runwayml/stable-diffusion-inpainting&quot;</span>,
torch_dtype=torch.float16,
variant=<span class="hljs-string">&quot;fp16&quot;</span>,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>)
<span class="hljs-comment"># load base and mask image</span>
init_image = load_image(<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png&quot;</span>)
mask_image = load_image(<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png&quot;</span>)
<span class="hljs-comment"># generator = torch.Generator(&quot;cuda&quot;).manual_seed(92)</span>
prompt = <span class="hljs-string">&quot;concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(
prompt=prompt,
image=init_image,
mask_image=mask_image,
generator=generator,
num_inference_steps=<span class="hljs-number">4</span>,
guidance_scale=<span class="hljs-number">4</span>,
).images[<span class="hljs-number">0</span>]
make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t65w1d"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_inpainting.png"></p> <h2 class="relative group"><a id="animatediff" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#animatediff"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AnimateDiff</span></h2> <p data-svelte-h="svelte-13othel"><code>AnimateDiff</code> allows you to animate images using Stable Diffusion models. To get good results, we need to generate multiple frames (16-24), and doing this with standard SD models can be very slow.
LCM-LoRA can be used to speed up the process significantly, as you just need to do 4-8 steps for each frame. Let’s look at how we can perform animation with LCM-LoRA and AnimateDiff.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> MotionAdapter, AnimateDiffPipeline, DDIMScheduler, LCMScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_gif
adapter = MotionAdapter.from_pretrained(<span class="hljs-string">&quot;diffusers/animatediff-motion-adapter-v1-5&quot;</span>)
pipe = AnimateDiffPipeline.from_pretrained(
<span class="hljs-string">&quot;frankjoshua/toonyou_beta6&quot;</span>,
motion_adapter=adapter,
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># set scheduler</span>
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># load LCM-LoRA</span>
pipe.load_lora_weights(<span class="hljs-string">&quot;latent-consistency/lcm-lora-sdv1-5&quot;</span>, adapter_name=<span class="hljs-string">&quot;lcm&quot;</span>)
pipe.load_lora_weights(<span class="hljs-string">&quot;guoyww/animatediff-motion-lora-zoom-in&quot;</span>, weight_name=<span class="hljs-string">&quot;diffusion_pytorch_model.safetensors&quot;</span>, adapter_name=<span class="hljs-string">&quot;motion-lora&quot;</span>)
pipe.set_adapters([<span class="hljs-string">&quot;lcm&quot;</span>, <span class="hljs-string">&quot;motion-lora&quot;</span>], adapter_weights=[<span class="hljs-number">0.55</span>, <span class="hljs-number">1.2</span>])
prompt = <span class="hljs-string">&quot;best quality, masterpiece, 1girl, looking at viewer, blurry background, upper body, contemporary, dress&quot;</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
frames = pipe(
prompt=prompt,
num_inference_steps=<span class="hljs-number">5</span>,
guidance_scale=<span class="hljs-number">1.25</span>,
cross_attention_kwargs={<span class="hljs-string">&quot;scale&quot;</span>: <span class="hljs-number">1</span>},
num_frames=<span class="hljs-number">24</span>,
generator=generator
).frames[<span class="hljs-number">0</span>]
export_to_gif(frames, <span class="hljs-string">&quot;animation.gif&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zceinn"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_animatediff.gif"></p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/inference_with_lcm_lora.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1j8f0t4 = {
assets: "/docs/diffusers/main/en",
base: "/docs/diffusers/main/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/diffusers/main/en/_app/immutable/entry/start.dea43253.js"),
import("/docs/diffusers/main/en/_app/immutable/entry/app.1d224620.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 176],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
53.4 kB
·
Xet hash:
ebb39e2c7c0ab7af36297846f7c3aba2db7433d4e4ef0ef371dbeaa69c28d309

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.