Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Inpainting","local":"inpainting","sections":[{"title":"Create a mask image","local":"create-a-mask-image","sections":[{"title":"Mask blur","local":"mask-blur","sections":[],"depth":3}],"depth":2},{"title":"Popular models","local":"popular-models","sections":[{"title":"Stable Diffusion Inpainting","local":"stable-diffusion-inpainting","sections":[],"depth":3},{"title":"Stable Diffusion XL (SDXL) Inpainting","local":"stable-diffusion-xl-sdxl-inpainting","sections":[],"depth":3},{"title":"Kandinsky 2.2 Inpainting","local":"kandinsky-22-inpainting","sections":[],"depth":3}],"depth":2},{"title":"Non-inpaint specific checkpoints","local":"non-inpaint-specific-checkpoints","sections":[],"depth":2},{"title":"Configure pipeline parameters","local":"configure-pipeline-parameters","sections":[{"title":"Strength","local":"strength","sections":[],"depth":3},{"title":"Guidance scale","local":"guidance-scale","sections":[],"depth":3},{"title":"Negative prompt","local":"negative-prompt","sections":[],"depth":3},{"title":"Padding mask crop","local":"padding-mask-crop","sections":[],"depth":3}],"depth":2},{"title":"Chained inpainting pipelines","local":"chained-inpainting-pipelines","sections":[{"title":"Text-to-image-to-inpaint","local":"text-to-image-to-inpaint","sections":[],"depth":3},{"title":"Inpaint-to-image-to-image","local":"inpaint-to-image-to-image","sections":[],"depth":3}],"depth":2},{"title":"Control image generation","local":"control-image-generation","sections":[{"title":"Prompt weighting","local":"prompt-weighting","sections":[],"depth":3},{"title":"ControlNet","local":"controlnet","sections":[],"depth":3}],"depth":2},{"title":"Optimize","local":"optimize","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/pr_10312/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/entry/start.203b6290.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/scheduler.8c3d61f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/singletons.8b179f45.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/index.0997d446.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/paths.3fd58d56.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/entry/app.423ea23f.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/index.da70eac4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/nodes/0.e544eae6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/nodes/246.5d7d0332.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/Tip.1d9b8c37.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/CodeBlock.00a903b3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/DocNotebookDropdown.02900f6b.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/EditOnGithub.1e64e623.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/HfOption.c1483eb1.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10312/en/_app/immutable/chunks/stores.d6eecc38.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Inpainting","local":"inpainting","sections":[{"title":"Create a mask image","local":"create-a-mask-image","sections":[{"title":"Mask blur","local":"mask-blur","sections":[],"depth":3}],"depth":2},{"title":"Popular models","local":"popular-models","sections":[{"title":"Stable Diffusion Inpainting","local":"stable-diffusion-inpainting","sections":[],"depth":3},{"title":"Stable Diffusion XL (SDXL) Inpainting","local":"stable-diffusion-xl-sdxl-inpainting","sections":[],"depth":3},{"title":"Kandinsky 2.2 Inpainting","local":"kandinsky-22-inpainting","sections":[],"depth":3}],"depth":2},{"title":"Non-inpaint specific checkpoints","local":"non-inpaint-specific-checkpoints","sections":[],"depth":2},{"title":"Configure pipeline parameters","local":"configure-pipeline-parameters","sections":[{"title":"Strength","local":"strength","sections":[],"depth":3},{"title":"Guidance scale","local":"guidance-scale","sections":[],"depth":3},{"title":"Negative prompt","local":"negative-prompt","sections":[],"depth":3},{"title":"Padding mask crop","local":"padding-mask-crop","sections":[],"depth":3}],"depth":2},{"title":"Chained inpainting pipelines","local":"chained-inpainting-pipelines","sections":[{"title":"Text-to-image-to-inpaint","local":"text-to-image-to-inpaint","sections":[],"depth":3},{"title":"Inpaint-to-image-to-image","local":"inpaint-to-image-to-image","sections":[],"depth":3}],"depth":2},{"title":"Control image generation","local":"control-image-generation","sections":[{"title":"Prompt weighting","local":"prompt-weighting","sections":[],"depth":3},{"title":"ControlNet","local":"controlnet","sections":[],"depth":3}],"depth":2},{"title":"Optimize","local":"optimize","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpainting</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-1vjeoqc">Inpainting replaces or edits specific areas of an image. This makes it a useful tool for image restoration like removing defects and artifacts, or even replacing an image area with something entirely new. Inpainting relies on a mask to determine which regions of an image to fill in; the area to inpaint is represented by white pixels and the area to keep is represented by black pixels. The white pixels are filled in by the prompt.</p> <p data-svelte-h="svelte-1t7j32f">With 🤗 Diffusers, here is how you can do inpainting:</p> <ol data-svelte-h="svelte-jam3iz"><li>Load an inpainting checkpoint with the <a href="/docs/diffusers/pr_10312/en/api/pipelines/auto_pipeline#diffusers.AutoPipelineForInpainting">AutoPipelineForInpainting</a> class. This’ll automatically detect the appropriate pipeline class to load based on the checkpoint:</li></ol> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-decoder-inpaint"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention()<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-qps3zw">You’ll notice throughout the guide, we use <a href="/docs/diffusers/pr_10312/en/api/pipelines/overview#diffusers.DiffusionPipeline.enable_model_cpu_offload">enable_model_cpu_offload()</a> and <a href="/docs/diffusers/pr_10312/en/api/pipelines/overview#diffusers.DiffusionPipeline.enable_xformers_memory_efficient_attention">enable_xformers_memory_efficient_attention()</a>, to save memory and increase inference speed. If you’re using PyTorch 2.0, it’s not necessary to call <a href="/docs/diffusers/pr_10312/en/api/pipelines/overview#diffusers.DiffusionPipeline.enable_xformers_memory_efficient_attention">enable_xformers_memory_efficient_attention()</a> on your pipeline because it’ll already be using PyTorch 2.0’s native <a href="../optimization/torch2.0#scaled-dot-product-attention">scaled-dot product attention</a>.</p></div> <ol start="2" data-svelte-h="svelte-1jtpcib"><li>Load the base and mask images:</li></ol> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>)<!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-1przvba"><li>Create a prompt to inpaint the image with and pass it to the pipeline with the base and mask images:</li></ol> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a black cat with glowing eyes, cute, adorable, disney, pixar, highly detailed, 8k"</span> | |
| negative_prompt = <span class="hljs-string">"bad anatomy, deformed, ugly, disfigured"</span> | |
| image = pipeline(prompt=prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-72kdzo"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">base image</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">mask image</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-cat.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption></div></div> <h2 class="relative group"><a id="create-a-mask-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-a-mask-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create a mask image</span></h2> <p data-svelte-h="svelte-ic55ez">Throughout this guide, the mask image is provided in all of the code examples for convenience. You can inpaint on your own images, but you’ll need to create a mask image for it. Use the Space below to easily create a mask image.</p> <p data-svelte-h="svelte-87gmw5">Upload a base image to inpaint on and use the sketch tool to draw a mask. Once you’re done, click <strong>Run</strong> to generate and download the mask image.</p> <iframe src="https://stevhliu-inpaint-mask-maker.hf.space" frameborder="0" width="850" height="450"></iframe> <h3 class="relative group"><a id="mask-blur" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mask-blur"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Mask blur</span></h3> <p data-svelte-h="svelte-1oce9nc">The <code>~VaeImageProcessor.blur</code> method provides an option for how to blend the original image and inpaint area. The amount of blur is determined by the <code>blur_factor</code> parameter. Increasing the <code>blur_factor</code> increases the amount of blur applied to the mask edges, softening the transition between the original image and inpaint area. A low or zero <code>blur_factor</code> preserves the sharper edges of the mask.</p> <p data-svelte-h="svelte-94ubh1">To use this, create a blurred mask with the image processor.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| pipeline = AutoPipelineForInpainting.from_pretrained(<span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">'cuda'</span>) | |
| mask = load_image(<span class="hljs-string">"https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore_mask.png"</span>) | |
| blurred_mask = pipeline.mask_processor.blur(mask, blur_factor=<span class="hljs-number">33</span>) | |
| blurred_mask<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-8eokiu"><div><img class="rounded-xl" src="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore_mask.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">mask with no blur</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/mask_blurred.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">mask with blur applied</figcaption></div></div> <h2 class="relative group"><a id="popular-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#popular-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Popular models</span></h2> <p data-svelte-h="svelte-5vkfn3"><a href="https://huggingface.co/runwayml/stable-diffusion-inpainting" rel="nofollow">Stable Diffusion Inpainting</a>, <a href="https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1" rel="nofollow">Stable Diffusion XL (SDXL) Inpainting</a>, and <a href="https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder-inpaint" rel="nofollow">Kandinsky 2.2 Inpainting</a> are among the most popular models for inpainting. SDXL typically produces higher resolution images than Stable Diffusion v1.5, and Kandinsky 2.2 is also capable of generating high-quality images.</p> <h3 class="relative group"><a id="stable-diffusion-inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion Inpainting</span></h3> <p data-svelte-h="svelte-siyv04">Stable Diffusion Inpainting is a latent diffusion model finetuned on 512x512 images on inpainting. It is a good starting point because it is relatively fast and generates good quality images. To use this model for inpainting, you’ll need to pass a prompt, base and mask image to the pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| generator = torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">92</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, generator=generator).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="stable-diffusion-xl-sdxl-inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl-sdxl-inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL (SDXL) Inpainting</span></h3> <p data-svelte-h="svelte-1grw9dr">SDXL is a larger and more powerful version of Stable Diffusion v1.5. This model can follow a two-stage model process (though each model can also be used alone); the base model generates an image, and a refiner model takes that image and further enhances its details and quality. Take a look at the <a href="sdxl">SDXL</a> guide for a more comprehensive guide on how to use SDXL and configure it’s parameters.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"diffusers/stable-diffusion-xl-1.0-inpainting-0.1"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| generator = torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">92</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, generator=generator).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="kandinsky-22-inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kandinsky-22-inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kandinsky 2.2 Inpainting</span></h3> <p data-svelte-h="svelte-1k8fkye">The Kandinsky model family is similar to SDXL because it uses two models as well; the image prior model creates image embeddings, and the diffusion model generates images from them. You can load the image prior and diffusion model separately, but the easiest way to use Kandinsky 2.2 is to load it into the <a href="/docs/diffusers/pr_10312/en/api/pipelines/auto_pipeline#diffusers.AutoPipelineForInpainting">AutoPipelineForInpainting</a> class which uses the <a href="/docs/diffusers/pr_10312/en/api/pipelines/kandinsky_v22#diffusers.KandinskyV22InpaintCombinedPipeline">KandinskyV22InpaintCombinedPipeline</a> under the hood.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-decoder-inpaint"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| generator = torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">92</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, generator=generator).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-xc2o0t"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">base image</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-sdv1.5.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">Stable Diffusion Inpainting</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-sdxl.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">Stable Diffusion XL Inpainting</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-kandinsky.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">Kandinsky 2.2 Inpainting</figcaption></div></div> <h2 class="relative group"><a id="non-inpaint-specific-checkpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#non-inpaint-specific-checkpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Non-inpaint specific checkpoints</span></h2> <p data-svelte-h="svelte-1f7rg9f">So far, this guide has used inpaint specific checkpoints such as <a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting" rel="nofollow">stable-diffusion-v1-5/stable-diffusion-inpainting</a>. But you can also use regular checkpoints like <a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5" rel="nofollow">stable-diffusion-v1-5/stable-diffusion-v1-5</a>. Let’s compare the results of the two checkpoints.</p> <p data-svelte-h="svelte-wgq5zt">The image on the left is generated from a regular checkpoint, and the image on the right is from an inpaint checkpoint. You’ll immediately notice the image on the left is not as clean, and you can still see the outline of the area the model is supposed to inpaint. The image on the right is much cleaner and the inpainted area appears more natural.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">stable-diffusion-v1-5/stable-diffusion-v1-5 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">runwayml/stable-diffusion-inpainting </div></div> <div class="language-select"><div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| generator = torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">92</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, generator=generator).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> </div> <div class="flex gap-4" data-svelte-h="svelte-5lzd2z"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/non-inpaint-specific.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">stable-diffusion-v1-5/stable-diffusion-v1-5</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-specific.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">runwayml/stable-diffusion-inpainting</figcaption></div></div> <p data-svelte-h="svelte-2plak">However, for more basic tasks like erasing an object from an image (like the rocks in the road for example), a regular checkpoint yields pretty good results. There isn’t as noticeable of difference between the regular and inpaint checkpoint.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">stable-diffusion-v1-5/stable-diffusion-v1-5 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">runwayml/stable-diffusion-inpaint </div></div> <div class="language-select"><div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/road-mask.png"</span>) | |
| image = pipeline(prompt=<span class="hljs-string">"road"</span>, image=init_image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> </div> <div class="flex gap-4" data-svelte-h="svelte-kg15p1"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/regular-inpaint-basic.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">stable-diffusion-v1-5/stable-diffusion-v1-5</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/specific-inpaint-basic.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">runwayml/stable-diffusion-inpainting</figcaption></div></div> <p data-svelte-h="svelte-duydyd">The trade-off of using a non-inpaint specific checkpoint is the overall image quality may be lower, but it generally tends to preserve the mask area (that is why you can see the mask outline). The inpaint specific checkpoints are intentionally trained to generate higher quality inpainted images, and that includes creating a more natural transition between the masked and unmasked areas. As a result, these checkpoints are more likely to change your unmasked area.</p> <p data-svelte-h="svelte-tdq62e">If preserving the unmasked area is important for your task, you can use the <code>VaeImageProcessor.apply_overlay</code> method to force the unmasked area of an image to remain the same at the expense of some more unnatural transitions between the masked and unmasked areas.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> PIL | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| device = <span class="hljs-string">"cuda"</span> | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipeline = pipeline.to(device) | |
| img_url = <span class="hljs-string">"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"</span> | |
| mask_url = <span class="hljs-string">"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"</span> | |
| init_image = load_image(img_url).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)) | |
| mask_image = load_image(mask_url).resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)) | |
| prompt = <span class="hljs-string">"Face of a yellow cat, high resolution, sitting on a park bench"</span> | |
| repainted_image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| repainted_image.save(<span class="hljs-string">"repainted_image.png"</span>) | |
| unmasked_unchanged_image = pipeline.image_processor.apply_overlay(mask_image, init_image, repainted_image) | |
| unmasked_unchanged_image.save(<span class="hljs-string">"force_unmasked_unchanged.png"</span>) | |
| make_image_grid([init_image, mask_image, repainted_image, unmasked_unchanged_image], rows=<span class="hljs-number">2</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="configure-pipeline-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configure-pipeline-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configure pipeline parameters</span></h2> <p data-svelte-h="svelte-13ocbdc">Image features - like quality and “creativity” - are dependent on pipeline parameters. Knowing what these parameters do is important for getting the results you want. Let’s take a look at the most important parameters and see how changing them affects the output.</p> <h3 class="relative group"><a id="strength" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#strength"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Strength</span></h3> <p data-svelte-h="svelte-qcb5md"><code>strength</code> is a measure of how much noise is added to the base image, which influences how similar the output is to the base image.</p> <ul data-svelte-h="svelte-vvcg88"><li>📈 a high <code>strength</code> value means more noise is added to an image and the denoising process takes longer, but you’ll get higher quality images that are more different from the base image</li> <li>📉 a low <code>strength</code> value means less noise is added to an image and the denoising process is faster, but the image quality may not be as great and the generated image resembles the base image more</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, strength=<span class="hljs-number">0.6</span>).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-1vmqp5s"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-strength-0.6.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">strength = 0.6</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-strength-0.8.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">strength = 0.8</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-strength-1.0.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">strength = 1.0</figcaption></div></div> <h3 class="relative group"><a id="guidance-scale" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#guidance-scale"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Guidance scale</span></h3> <p data-svelte-h="svelte-zcb81p"><code>guidance_scale</code> affects how aligned the text prompt and generated image are.</p> <ul data-svelte-h="svelte-cnggx4"><li>📈 a high <code>guidance_scale</code> value means the prompt and generated image are closely aligned, so the output is a stricter interpretation of the prompt</li> <li>📉 a low <code>guidance_scale</code> value means the prompt and generated image are more loosely aligned, so the output may be more varied from the prompt</li></ul> <p data-svelte-h="svelte-196id8d">You can use <code>strength</code> and <code>guidance_scale</code> together for more control over how expressive the model is. For example, a combination high <code>strength</code> and <code>guidance_scale</code> values gives the model the most creative freedom.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, guidance_scale=<span class="hljs-number">2.5</span>).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-1cwhpu7"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-guidance-2.5.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">guidance_scale = 2.5</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-guidance-7.5.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">guidance_scale = 7.5</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-guidance-12.5.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">guidance_scale = 12.5</figcaption></div></div> <h3 class="relative group"><a id="negative-prompt" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#negative-prompt"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Negative prompt</span></h3> <p data-svelte-h="svelte-1ru9kar">A negative prompt assumes the opposite role of a prompt; it guides the model away from generating certain things in an image. This is useful for quickly improving image quality and preventing the model from generating things you don’t want.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| negative_prompt = <span class="hljs-string">"bad architecture, unstable, poor details, blurry"</span> | |
| image = pipeline(prompt=prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-15bd4ta"><figure><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-negative.png"> <figcaption class="text-center">negative_prompt = "bad architecture, unstable, poor details, blurry"</figcaption></figure></div> <h3 class="relative group"><a id="padding-mask-crop" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#padding-mask-crop"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Padding mask crop</span></h3> <p data-svelte-h="svelte-4xqczw">A method for increasing the inpainting image quality is to use the <a href="https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.__call__.padding_mask_crop" rel="nofollow"><code>padding_mask_crop</code></a> parameter. When enabled, this option crops the masked area with some user-specified padding and it’ll also crop the same area from the original image. Both the image and mask are upscaled to a higher resolution for inpainting, and then overlaid on the original image. This is a quick and easy way to improve image quality without using a separate pipeline like <a href="/docs/diffusers/pr_10312/en/api/pipelines/stable_diffusion/upscale#diffusers.StableDiffusionUpscalePipeline">StableDiffusionUpscalePipeline</a>.</p> <p data-svelte-h="svelte-1kzwzp9">Add the <code>padding_mask_crop</code> parameter to the pipeline call and set it to the desired padding value.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| generator = torch.Generator(device=<span class="hljs-string">'cuda'</span>).manual_seed(<span class="hljs-number">0</span>) | |
| pipeline = AutoPipelineForInpainting.from_pretrained(<span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">'cuda'</span>) | |
| base = load_image(<span class="hljs-string">"https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"</span>) | |
| mask = load_image(<span class="hljs-string">"https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore_mask.png"</span>) | |
| image = pipeline(<span class="hljs-string">"boat"</span>, image=base, mask_image=mask, strength=<span class="hljs-number">0.75</span>, generator=generator, padding_mask_crop=<span class="hljs-number">32</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-107i7jo"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/baseline_inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">default inpaint image</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/padding_mask_crop_inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">inpaint image with `padding_mask_crop` enabled</figcaption></div></div> <h2 class="relative group"><a id="chained-inpainting-pipelines" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chained-inpainting-pipelines"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chained inpainting pipelines</span></h2> <p data-svelte-h="svelte-7i70jn"><a href="/docs/diffusers/pr_10312/en/api/pipelines/auto_pipeline#diffusers.AutoPipelineForInpainting">AutoPipelineForInpainting</a> can be chained with other 🤗 Diffusers pipelines to edit their outputs. This is often useful for improving the output quality from your other diffusion pipelines, and if you’re using multiple pipelines, it can be more memory-efficient to chain them together to keep the outputs in latent space and reuse the same pipeline components.</p> <h3 class="relative group"><a id="text-to-image-to-inpaint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image-to-inpaint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image-to-inpaint</span></h3> <p data-svelte-h="svelte-1uqw6jr">Chaining a text-to-image and inpainting pipeline allows you to inpaint the generated image, and you don’t have to provide a base image to begin with. This makes it convenient to edit your favorite text-to-image outputs without having to generate an entirely new image.</p> <p data-svelte-h="svelte-1kzp8f0">Start with the text-to-image pipeline to create a castle:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image, AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForText2Image.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| text2image = pipeline(<span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span>).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-yqefj6">Load the mask image of the output from above:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_text-chain-mask.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ua49j1">And let’s inpaint the masked area with a waterfall:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-decoder-inpaint"</span>, torch_dtype=torch.float16 | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| prompt = <span class="hljs-string">"digital painting of a fantasy waterfall, cloudy"</span> | |
| image = pipeline(prompt=prompt, image=text2image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([text2image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-15ens2s"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-text-chain.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">text-to-image</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-text-chain-out.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">inpaint</figcaption></div></div> <h3 class="relative group"><a id="inpaint-to-image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpaint-to-image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpaint-to-image-to-image</span></h3> <p data-svelte-h="svelte-16qmg1v">You can also chain an inpainting pipeline before another pipeline like image-to-image or an upscaler to improve the quality.</p> <p data-svelte-h="svelte-1du3njh">Begin by inpainting an image:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting, AutoPipelineForImage2Image | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image_inpainting = pipeline(prompt=prompt, image=init_image, mask_image=mask_image).images[<span class="hljs-number">0</span>] | |
| <span class="hljs-comment"># resize image to 1024x1024 for SDXL</span> | |
| image_inpainting = image_inpainting.resize((<span class="hljs-number">1024</span>, <span class="hljs-number">1024</span>))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-j4alid">Now let’s pass the image to another inpainting pipeline with SDXL’s refiner model to enhance the image details and quality:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-refiner-1.0"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| image = pipeline(prompt=prompt, image=image_inpainting, mask_image=mask_image, output_type=<span class="hljs-string">"latent"</span>).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1mb2wkz">It is important to specify <code>output_type="latent"</code> in the pipeline to keep all the outputs in latent space to avoid an unnecessary decode-encode step. This only works if the chained pipelines are using the same VAE. For example, in the <a href="#text-to-image-to-inpaint">Text-to-image-to-inpaint</a> section, Kandinsky 2.2 uses a different VAE class than the Stable Diffusion model so it won’t work. But if you use Stable Diffusion v1.5 for both pipelines, then you can keep everything in latent space because they both use <a href="/docs/diffusers/pr_10312/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>.</p></div> <p data-svelte-h="svelte-1wqr07d">Finally, you can pass this image to an image-to-image pipeline to put the finishing touches on it. It is more efficient to use the <a href="/docs/diffusers/pr_10312/en/api/pipelines/auto_pipeline#diffusers.AutoPipelineForImage2Image.from_pipe">from_pipe()</a> method to reuse the existing pipeline components, and avoid unnecessarily loading all the pipeline components into memory again.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = AutoPipelineForImage2Image.from_pipe(pipeline) | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| image = pipeline(prompt=prompt, image=image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image_inpainting, image], rows=<span class="hljs-number">2</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-1lbnv8n"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-to-image-chain.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">inpaint</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-to-image-final.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">image-to-image</figcaption></div></div> <p data-svelte-h="svelte-1fd5b22">Image-to-image and inpainting are actually very similar tasks. Image-to-image generates a new image that resembles the existing provided image. Inpainting does the same thing, but it only transforms the image area defined by the mask and the rest of the image is unchanged. You can think of inpainting as a more precise tool for making specific changes and image-to-image has a broader scope for making more sweeping changes.</p> <h2 class="relative group"><a id="control-image-generation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#control-image-generation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Control image generation</span></h2> <p data-svelte-h="svelte-1gc804h">Getting an image to look exactly the way you want is challenging because the denoising process is random. While you can control certain aspects of generation by configuring parameters like <code>negative_prompt</code>, there are better and more efficient methods for controlling image generation.</p> <h3 class="relative group"><a id="prompt-weighting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-weighting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt weighting</span></h3> <p data-svelte-h="svelte-6zcpc0">Prompt weighting provides a quantifiable way to scale the representation of concepts in a prompt. You can use it to increase or decrease the magnitude of the text embedding vector for each concept in the prompt, which subsequently determines how much of each concept is generated. The <a href="https://github.com/damian0815/compel" rel="nofollow">Compel</a> library offers an intuitive syntax for scaling the prompt weights and generating the embeddings. Learn how to create the embeddings in the <a href="../using-diffusers/weighted_prompts">Prompt weighting</a> guide.</p> <p data-svelte-h="svelte-140ngfk">Once you’ve generated the embeddings, pass them to the <code>prompt_embeds</code> (and <code>negative_prompt_embeds</code> if you’re using a negative prompt) parameter in the <a href="/docs/diffusers/pr_10312/en/api/pipelines/auto_pipeline#diffusers.AutoPipelineForInpainting">AutoPipelineForInpainting</a>. The embeddings replace the <code>prompt</code> parameter:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid | |
| pipeline = AutoPipelineForInpainting.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, torch_dtype=torch.float16, | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| image = pipeline(prompt_embeds=prompt_embeds, <span class="hljs-comment"># generated from Compel</span> | |
| negative_prompt_embeds=negative_prompt_embeds, <span class="hljs-comment"># generated from Compel</span> | |
| image=init_image, | |
| mask_image=mask_image | |
| ).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h3> <p data-svelte-h="svelte-3rypf9">ControlNet models are used with other diffusion models like Stable Diffusion, and they provide an even more flexible and accurate way to control how an image is generated. A ControlNet accepts an additional conditioning image input that guides the diffusion model to preserve the features in it.</p> <p data-svelte-h="svelte-1x7ee8u">For example, let’s condition an image with a ControlNet pretrained on inpaint images:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ControlNetModel, StableDiffusionControlNetInpaintPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| <span class="hljs-comment"># load ControlNet</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/control_v11p_sd15_inpaint"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| <span class="hljs-comment"># pass ControlNet to the pipeline</span> | |
| pipeline = StableDiffusionControlNetInpaintPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-inpainting"</span>, controlnet=controlnet, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-comment"># load base and mask image</span> | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"</span>) | |
| mask_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"</span>) | |
| <span class="hljs-comment"># prepare control image</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">make_inpaint_condition</span>(<span class="hljs-params">init_image, mask_image</span>): | |
| init_image = np.array(init_image.convert(<span class="hljs-string">"RGB"</span>)).astype(np.float32) / <span class="hljs-number">255.0</span> | |
| mask_image = np.array(mask_image.convert(<span class="hljs-string">"L"</span>)).astype(np.float32) / <span class="hljs-number">255.0</span> | |
| <span class="hljs-keyword">assert</span> init_image.shape[<span class="hljs-number">0</span>:<span class="hljs-number">1</span>] == mask_image.shape[<span class="hljs-number">0</span>:<span class="hljs-number">1</span>], <span class="hljs-string">"image and image_mask must have the same image size"</span> | |
| init_image[mask_image > <span class="hljs-number">0.5</span>] = -<span class="hljs-number">1.0</span> <span class="hljs-comment"># set as masked pixel</span> | |
| init_image = np.expand_dims(init_image, <span class="hljs-number">0</span>).transpose(<span class="hljs-number">0</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>, <span class="hljs-number">2</span>) | |
| init_image = torch.from_numpy(init_image) | |
| <span class="hljs-keyword">return</span> init_image | |
| control_image = make_inpaint_condition(init_image, mask_image)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-db3ja">Now generate an image from the base, mask and control images. You’ll notice features of the base image are strongly preserved in the generated image.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k"</span> | |
| image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, control_image=control_image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, PIL.Image.fromarray(np.uint8(control_image[<span class="hljs-number">0</span>][<span class="hljs-number">0</span>])).convert(<span class="hljs-string">'RGB'</span>), image], rows=<span class="hljs-number">2</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-iaoixt">You can take this a step further and chain it with an image-to-image pipeline to apply a new <a href="https://huggingface.co/nitrosocke/elden-ring-diffusion" rel="nofollow">style</a>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForImage2Image | |
| pipeline = AutoPipelineForImage2Image.from_pretrained( | |
| <span class="hljs-string">"nitrosocke/elden-ring-diffusion"</span>, torch_dtype=torch.float16, | |
| ) | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed</span> | |
| pipeline.enable_xformers_memory_efficient_attention() | |
| prompt = <span class="hljs-string">"elden ring style castle"</span> <span class="hljs-comment"># include the token "elden ring style" in the prompt</span> | |
| negative_prompt = <span class="hljs-string">"bad architecture, deformed, disfigured, poor details"</span> | |
| image_elden_ring = pipeline(prompt, negative_prompt=negative_prompt, image=image).images[<span class="hljs-number">0</span>] | |
| make_image_grid([init_image, mask_image, image, image_elden_ring], rows=<span class="hljs-number">2</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex flex-row gap-4" data-svelte-h="svelte-9rfwwm"><div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">initial image</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-controlnet.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">ControlNet inpaint</figcaption></div> <div class="flex-1"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint-img2img.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">image-to-image</figcaption></div></div> <h2 class="relative group"><a id="optimize" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#optimize"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Optimize</span></h2> <p data-svelte-h="svelte-1f04pnr">It can be difficult and slow to run diffusion models if you’re resource constrained, but it doesn’t have to be with a few optimization tricks. One of the biggest (and easiest) optimizations you can enable is switching to memory-efficient attention. If you’re using PyTorch 2.0, <a href="../optimization/torch2.0#scaled-dot-product-attention">scaled-dot product attention</a> is automatically enabled and you don’t need to do anything else. For non-PyTorch 2.0 users, you can install and use <a href="../optimization/xformers">xFormers</a>’s implementation of memory-efficient attention. Both options reduce memory usage and accelerate inference.</p> <p data-svelte-h="svelte-1eqbc05">You can also offload the model to the CPU to save even more memory:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-addition">+ pipeline.enable_xformers_memory_efficient_attention()</span> | |
| <span class="hljs-addition">+ pipeline.enable_model_cpu_offload()</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-c3yozi">To speed-up your inference code even more, use <a href="../optimization/torch2.0#torchcompile"><code>torch_compile</code></a>. You should wrap <code>torch.compile</code> around the most intensive component in the pipeline which is typically the UNet:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline.unet = torch.<span class="hljs-built_in">compile</span>(pipeline.unet, mode=<span class="hljs-string">"reduce-overhead"</span>, fullgraph=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ve8eie">Learn more in the <a href="../optimization/memory">Reduce memory usage</a> and <a href="../optimization/torch2.0">Torch 2.0</a> guides.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/inpaint.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_zvhs44 = { | |
| assets: "/docs/diffusers/pr_10312/en", | |
| base: "/docs/diffusers/pr_10312/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/pr_10312/en/_app/immutable/entry/start.203b6290.js"), | |
| import("/docs/diffusers/pr_10312/en/_app/immutable/entry/app.423ea23f.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 246], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 119 kB
- Xet hash:
- 57c341182f8a6560adde0046a1bca6b8f9ce6385d4dcb6b443fb3d47435a8a0d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.