Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Performing inference with LCM-LoRA","local":"performing-inference-with-lcm-lora","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[{"title":"Inference with a fine-tuned model","local":"inference-with-a-fine-tuned-model","sections":[],"depth":3}],"depth":2},{"title":"Image-to-image","local":"image-to-image","sections":[],"depth":2},{"title":"Combine with styled LoRAs","local":"combine-with-styled-loras","sections":[],"depth":2},{"title":"ControlNet/T2I-Adapter","local":"controlnett2i-adapter","sections":[{"title":"ControlNet","local":"controlnet","sections":[],"depth":3},{"title":"T2I-Adapter","local":"t2i-adapter","sections":[],"depth":3}],"depth":2},{"title":"Inpainting","local":"inpainting","sections":[],"depth":2},{"title":"AnimateDiff","local":"animatediff","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/start.dea43253.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/scheduler.182ea377.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/singletons.463df26e.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.1f6d62f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/paths.63e6068d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/app.1d224620.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.abf12888.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/0.6c9ef567.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/176.da677ddd.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/Tip.230e2334.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/CodeBlock.57fe6e13.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/DocNotebookDropdown.5fa27ace.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/EditOnGithub.9b8e78e4.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Performing inference with LCM-LoRA","local":"performing-inference-with-lcm-lora","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[{"title":"Inference with a fine-tuned model","local":"inference-with-a-fine-tuned-model","sections":[],"depth":3}],"depth":2},{"title":"Image-to-image","local":"image-to-image","sections":[],"depth":2},{"title":"Combine with styled LoRAs","local":"combine-with-styled-loras","sections":[],"depth":2},{"title":"ControlNet/T2I-Adapter","local":"controlnett2i-adapter","sections":[{"title":"ControlNet","local":"controlnet","sections":[],"depth":3},{"title":"T2I-Adapter","local":"t2i-adapter","sections":[],"depth":3}],"depth":2},{"title":"Inpainting","local":"inpainting","sections":[],"depth":2},{"title":"AnimateDiff","local":"animatediff","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="performing-inference-with-lcm-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#performing-inference-with-lcm-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Performing inference with LCM-LoRA</span></h1> <p data-svelte-h="svelte-2fpkx0">Latent Consistency Models (LCM) enable quality image generation in typically 2-4 steps making it possible to use diffusion models in almost real-time settings.</p> <p data-svelte-h="svelte-d9zkw1">From the <a href="https://latent-consistency-models.github.io/" rel="nofollow">official website</a>:</p> <blockquote data-svelte-h="svelte-9ngsrn"><p>LCMs can be distilled from any pre-trained Stable Diffusion (SD) in only 4,000 training steps (~32 A100 GPU Hours) for generating high quality 768 x 768 resolution images in 2~4 steps or even one step, significantly accelerating text-to-image generation. We employ LCM to distill the Dreamshaper-V7 version of SD in just 4,000 training iterations.</p></blockquote> <p data-svelte-h="svelte-yzou0e">For a more technical overview of LCMs, refer to <a href="https://huggingface.co/papers/2310.04378" rel="nofollow">the paper</a>.</p> <p data-svelte-h="svelte-sqxks">However, each model needs to be distilled separately for latent consistency distillation. The core idea with LCM-LoRA is to train just a few adapter layers, the adapter being LoRA in this case. | |
| This way, we don’t have to train the full model and keep the number of trainable parameters manageable. The resulting LoRAs can then be applied to any fine-tuned version of the model without distilling them separately. | |
| Additionally, the LoRAs can be applied to image-to-image, ControlNet/T2I-Adapter, inpainting, AnimateDiff etc. | |
| The LCM-LoRA can also be combined with other LoRAs to generate styled images in very few steps (4-8).</p> <p data-svelte-h="svelte-121irgb">LCM-LoRAs are available for <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow">stable-diffusion-v1-5</a>, <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0" rel="nofollow">stable-diffusion-xl-base-1.0</a>, and the <a href="https://huggingface.co/segmind/SSD-1B" rel="nofollow">SSD-1B</a> model. All the checkpoints can be found in this <a href="https://huggingface.co/collections/latent-consistency/latent-consistency-models-loras-654cdd24e111e16f0865fba6" rel="nofollow">collection</a>.</p> <p data-svelte-h="svelte-1op1l2i">For more details about LCM-LoRA, refer to <a href="https://huggingface.co/papers/2311.05556" rel="nofollow">the technical report</a>.</p> <p data-svelte-h="svelte-1pmm8o2">This guide shows how to perform inference with LCM-LoRAs for</p> <ul data-svelte-h="svelte-1utl29a"><li>text-to-image</li> <li>image-to-image</li> <li>combined with styled LoRAs</li> <li>ControlNet/T2I-Adapter</li> <li>inpainting</li> <li>AnimateDiff</li></ul> <p data-svelte-h="svelte-1l89nj1">Before going through this guide, we’ll take a look at the general workflow for performing inference with LCM-LoRAs. | |
| LCM-LoRAs are similar to other Stable Diffusion LoRAs so they can be used with any <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> that supports LoRAs.</p> <ul data-svelte-h="svelte-1or8ebm"><li>Load the task specific pipeline and model.</li> <li>Set the scheduler to <a href="/docs/diffusers/main/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a>.</li> <li>Load the LCM-LoRA weights for the model.</li> <li>Reduce the <code>guidance_scale</code> between <code>[1.0, 2.0]</code> and set the <code>num_inference_steps</code> between [4, 8].</li> <li>Perform inference with the pipeline with the usual parameters.</li></ul> <p data-svelte-h="svelte-19h4utp">Let’s look at how we can perform inference with LCM-LoRAs for different tasks.</p> <p data-svelte-h="svelte-qd8sxg">First, make sure you have <a href="https://github.com/huggingface/peft" rel="nofollow">peft</a> installed, for better LoRA support.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U peft<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h2> <p data-svelte-h="svelte-10c0x65">You’ll use the <a href="/docs/diffusers/main/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline">StableDiffusionXLPipeline</a> with the scheduler: <a href="/docs/diffusers/main/en/api/schedulers/lcm#diffusers.LCMScheduler">LCMScheduler</a> and then load the LCM-LoRA. Together with the LCM-LoRA and the scheduler, the pipeline enables a fast inference workflow overcoming the slow iterative nature of diffusion models.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler | |
| pipe = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdxl"</span>) | |
| prompt = <span class="hljs-string">"Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">42</span>) | |
| image = pipe( | |
| prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">1.0</span> | |
| ).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zmq986"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2i.png"></p> <p data-svelte-h="svelte-n61tgz">Notice that we use only 4 steps for generation which is way less than what’s typically used for standard SDXL.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-x2r697">You may have noticed that we set <code>guidance_scale=1.0</code>, which disables classifer-free-guidance. This is because the LCM-LoRA is trained with guidance, so the batch size does not have to be doubled in this case. This leads to a faster inference time, with the drawback that negative prompts don’t have any effect on the denoising process.</p> <p data-svelte-h="svelte-1sl97s7">You can also use guidance with LCM-LoRA, but due to the nature of training the model is very sensitve to the <code>guidance_scale</code> values, high values can lead to artifacts in the generated images. In our experiments, we found that the best values are in the range of [1.0, 2.0].</p></div> <h3 class="relative group"><a id="inference-with-a-fine-tuned-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-with-a-fine-tuned-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference with a fine-tuned model</span></h3> <p data-svelte-h="svelte-15rr98x">As mentioned above, the LCM-LoRA can be applied to any fine-tuned version of the model without having to distill them separately. Let’s look at how we can perform inference with a fine-tuned model. In this example, we’ll use the <a href="https://huggingface.co/Linaqruf/animagine-xl" rel="nofollow">animagine-xl</a> model, which is a fine-tuned version of the SDXL model for generating anime.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler | |
| pipe = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"Linaqruf/animagine-xl"</span>, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdxl"</span>) | |
| prompt = <span class="hljs-string">"face focus, cute, masterpiece, best quality, 1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, night, turtleneck"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe( | |
| prompt=prompt, num_inference_steps=<span class="hljs-number">4</span>, generator=generator, guidance_scale=<span class="hljs-number">1.0</span> | |
| ).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19vr4c5"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2i_finetuned.png"></p> <h2 class="relative group"><a id="image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image-to-image</span></h2> <p data-svelte-h="svelte-2suklx">LCM-LoRA can be applied to image-to-image tasks too. Let’s look at how we can perform image-to-image generation with LCMs. For this example we’ll use the <a href="https://huggingface.co/Lykon/dreamshaper-7" rel="nofollow">dreamshaper-7</a> model and the LCM-LoRA for <code>stable-diffusion-v1-5 </code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForImage2Image, LCMScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid, load_image | |
| pipe = AutoPipelineForImage2Image.from_pretrained( | |
| <span class="hljs-string">"Lykon/dreamshaper-7"</span>, | |
| torch_dtype=torch.float16, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdv1-5"</span>) | |
| <span class="hljs-comment"># prepare image</span> | |
| url = <span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png"</span> | |
| init_image = load_image(url) | |
| prompt = <span class="hljs-string">"Astronauts in a jungle, cold color palette, muted colors, detailed, 8k"</span> | |
| <span class="hljs-comment"># pass prompt and image to pipeline</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe( | |
| prompt, | |
| image=init_image, | |
| num_inference_steps=<span class="hljs-number">4</span>, | |
| guidance_scale=<span class="hljs-number">1</span>, | |
| strength=<span class="hljs-number">0.6</span>, | |
| generator=generator | |
| ).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zt9d3e"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_i2i.png"></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-18pswq">You can get different results based on your prompt and the image you provide. To get the best results, we recommend trying different values for <code>num_inference_steps</code>, <code>strength</code>, and <code>guidance_scale</code> parameters and choose the best one.</p></div> <h2 class="relative group"><a id="combine-with-styled-loras" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#combine-with-styled-loras"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Combine with styled LoRAs</span></h2> <p data-svelte-h="svelte-1ueo85e">LCM-LoRA can be combined with other LoRAs to generate styled-images in very few steps (4-8). In the following example, we’ll use the LCM-LoRA with the <a href="TheLastBen/Papercut_SDXL">papercut LoRA</a>. | |
| To learn more about how to combine LoRAs, refer to <a href="https://huggingface.co/docs/diffusers/tutorials/using_peft_for_inference#combine-multiple-adapters" rel="nofollow">this guide</a>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, LCMScheduler | |
| pipe = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LoRAs</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdxl"</span>, adapter_name=<span class="hljs-string">"lcm"</span>) | |
| pipe.load_lora_weights(<span class="hljs-string">"TheLastBen/Papercut_SDXL"</span>, weight_name=<span class="hljs-string">"papercut.safetensors"</span>, adapter_name=<span class="hljs-string">"papercut"</span>) | |
| <span class="hljs-comment"># Combine LoRAs</span> | |
| pipe.set_adapters([<span class="hljs-string">"lcm"</span>, <span class="hljs-string">"papercut"</span>], adapter_weights=[<span class="hljs-number">1.0</span>, <span class="hljs-number">0.8</span>]) | |
| prompt = <span class="hljs-string">"papercut, a cute fox"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe(prompt, num_inference_steps=<span class="hljs-number">4</span>, guidance_scale=<span class="hljs-number">1</span>, generator=generator).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9tzi4"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdx_lora_mix.png"></p> <h2 class="relative group"><a id="controlnett2i-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnett2i-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet/T2I-Adapter</span></h2> <p data-svelte-h="svelte-db61xh">Let’s look at how we can perform inference with ControlNet/T2I-Adapter and LCM-LoRA.</p> <h3 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h3> <p data-svelte-h="svelte-1cso8xy">For this example, we’ll use the SD-v1-5 model and the LCM-LoRA for SD-v1-5 with canny ControlNet.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> cv2 | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| image = load_image( | |
| <span class="hljs-string">"https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"</span> | |
| ).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)) | |
| image = np.array(image) | |
| low_threshold = <span class="hljs-number">100</span> | |
| high_threshold = <span class="hljs-number">200</span> | |
| image = cv2.Canny(image, low_threshold, high_threshold) | |
| image = image[:, :, <span class="hljs-literal">None</span>] | |
| image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>) | |
| canny_image = Image.fromarray(image) | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| controlnet=controlnet, | |
| torch_dtype=torch.float16, | |
| safety_checker=<span class="hljs-literal">None</span>, | |
| variant=<span class="hljs-string">"fp16"</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdv1-5"</span>) | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe( | |
| <span class="hljs-string">"the mona lisa"</span>, | |
| image=canny_image, | |
| num_inference_steps=<span class="hljs-number">4</span>, | |
| guidance_scale=<span class="hljs-number">1.5</span>, | |
| controlnet_conditioning_scale=<span class="hljs-number">0.8</span>, | |
| cross_attention_kwargs={<span class="hljs-string">"scale"</span>: <span class="hljs-number">1</span>}, | |
| generator=generator, | |
| ).images[<span class="hljs-number">0</span>] | |
| make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11lx81y"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_controlnet.png"></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one.</div> <h3 class="relative group"><a id="t2i-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t2i-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T2I-Adapter</span></h3> <p data-svelte-h="svelte-5l31pr">This example shows how to use the LCM-LoRA with the <a href="TencentARC/t2i-adapter-canny-sdxl-1.0">Canny T2I-Adapter</a> and SDXL.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> cv2 | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLAdapterPipeline, T2IAdapter, LCMScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| <span class="hljs-comment"># Prepare image</span> | |
| <span class="hljs-comment"># Detect the canny map in low resolution to avoid high-frequency details</span> | |
| image = load_image( | |
| <span class="hljs-string">"https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_canny.jpg"</span> | |
| ).resize((<span class="hljs-number">384</span>, <span class="hljs-number">384</span>)) | |
| image = np.array(image) | |
| low_threshold = <span class="hljs-number">100</span> | |
| high_threshold = <span class="hljs-number">200</span> | |
| image = cv2.Canny(image, low_threshold, high_threshold) | |
| image = image[:, :, <span class="hljs-literal">None</span>] | |
| image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>) | |
| canny_image = Image.fromarray(image).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">1024</span>)) | |
| <span class="hljs-comment"># load adapter</span> | |
| adapter = T2IAdapter.from_pretrained(<span class="hljs-string">"TencentARC/t2i-adapter-canny-sdxl-1.0"</span>, torch_dtype=torch.float16, varient=<span class="hljs-string">"fp16"</span>).to(<span class="hljs-string">"cuda"</span>) | |
| pipe = StableDiffusionXLAdapterPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| adapter=adapter, | |
| torch_dtype=torch.float16, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdxl"</span>) | |
| prompt = <span class="hljs-string">"Mystical fairy in real, magic, 4k picture, high quality"</span> | |
| negative_prompt = <span class="hljs-string">"extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| image=canny_image, | |
| num_inference_steps=<span class="hljs-number">4</span>, | |
| guidance_scale=<span class="hljs-number">1.5</span>, | |
| adapter_conditioning_scale=<span class="hljs-number">0.8</span>, | |
| adapter_conditioning_factor=<span class="hljs-number">1</span>, | |
| generator=generator, | |
| ).images[<span class="hljs-number">0</span>] | |
| make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-iuzcjf"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdxl_t2iadapter.png"></p> <h2 class="relative group"><a id="inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpainting</span></h2> <p data-svelte-h="svelte-fhcmt1">LCM-LoRA can be used for inpainting as well.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting, LCMScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipe = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, | |
| torch_dtype=torch.float16, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdv1-5"</span>) | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| <span class="hljs-comment"># generator = torch.Generator("cuda").manual_seed(92)</span> | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe( | |
| prompt=prompt, | |
| image=init_image, | |
| mask_image=mask_image, | |
| generator=generator, | |
| num_inference_steps=<span class="hljs-number">4</span>, | |
| guidance_scale=<span class="hljs-number">4</span>, | |
| ).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t65w1d"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_inpainting.png"></p> <h2 class="relative group"><a id="animatediff" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#animatediff"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AnimateDiff</span></h2> <p data-svelte-h="svelte-13othel"><code>AnimateDiff</code> allows you to animate images using Stable Diffusion models. To get good results, we need to generate multiple frames (16-24), and doing this with standard SD models can be very slow. | |
| LCM-LoRA can be used to speed up the process significantly, as you just need to do 4-8 steps for each frame. Let’s look at how we can perform animation with LCM-LoRA and AnimateDiff.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> MotionAdapter, AnimateDiffPipeline, DDIMScheduler, LCMScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_gif | |
| adapter = MotionAdapter.from_pretrained(<span class="hljs-string">"diffusers/animatediff-motion-adapter-v1-5"</span>) | |
| pipe = AnimateDiffPipeline.from_pretrained( | |
| <span class="hljs-string">"frankjoshua/toonyou_beta6"</span>, | |
| motion_adapter=adapter, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># set scheduler</span> | |
| pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># load LCM-LoRA</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"latent-consistency/lcm-lora-sdv1-5"</span>, adapter_name=<span class="hljs-string">"lcm"</span>) | |
| pipe.load_lora_weights(<span class="hljs-string">"guoyww/animatediff-motion-lora-zoom-in"</span>, weight_name=<span class="hljs-string">"diffusion_pytorch_model.safetensors"</span>, adapter_name=<span class="hljs-string">"motion-lora"</span>) | |
| pipe.set_adapters([<span class="hljs-string">"lcm"</span>, <span class="hljs-string">"motion-lora"</span>], adapter_weights=[<span class="hljs-number">0.55</span>, <span class="hljs-number">1.2</span>]) | |
| prompt = <span class="hljs-string">"best quality, masterpiece, 1girl, looking at viewer, blurry background, upper body, contemporary, dress"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| frames = pipe( | |
| prompt=prompt, | |
| num_inference_steps=<span class="hljs-number">5</span>, | |
| guidance_scale=<span class="hljs-number">1.25</span>, | |
| cross_attention_kwargs={<span class="hljs-string">"scale"</span>: <span class="hljs-number">1</span>}, | |
| num_frames=<span class="hljs-number">24</span>, | |
| generator=generator | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_gif(frames, <span class="hljs-string">"animation.gif"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zceinn"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lcm/lcm_sdv1-5_animatediff.gif"></p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/inference_with_lcm_lora.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1j8f0t4 = { | |
| assets: "/docs/diffusers/main/en", | |
| base: "/docs/diffusers/main/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/main/en/_app/immutable/entry/start.dea43253.js"), | |
| import("/docs/diffusers/main/en/_app/immutable/entry/app.1d224620.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 176], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 53.4 kB
- Xet hash:
- ebb39e2c7c0ab7af36297846f7c3aba2db7433d4e4ef0ef371dbeaa69c28d309
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.