Buckets:

hf-doc-build/doc-dev / diffusers /main /en /quicktour.html
download
raw
60.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quicktour&quot;,&quot;local&quot;:&quot;quicktour&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;DiffusionPipeline&quot;,&quot;local&quot;:&quot;diffusionpipeline&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Local pipeline&quot;,&quot;local&quot;:&quot;local-pipeline&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Swapping schedulers&quot;,&quot;local&quot;:&quot;swapping-schedulers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Models&quot;,&quot;local&quot;:&quot;models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Schedulers&quot;,&quot;local&quot;:&quot;schedulers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/diffusers/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/start.21e27d66.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/scheduler.8c3d61f6.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/singletons.1db06f6d.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.0997d446.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/paths.085e8bc8.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/entry/app.de4fb612.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/index.da70eac4.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/0.f6117ae5.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/nodes/179.d0cd46d3.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/Tip.1d9b8c37.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/CodeBlock.00a903b3.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/DocNotebookDropdown.02900f6b.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/diffusers/main/en/_app/immutable/chunks/EditOnGithub.1e64e623.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quicktour&quot;,&quot;local&quot;:&quot;quicktour&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;DiffusionPipeline&quot;,&quot;local&quot;:&quot;diffusionpipeline&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Local pipeline&quot;,&quot;local&quot;:&quot;local-pipeline&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Swapping schedulers&quot;,&quot;local&quot;:&quot;swapping-schedulers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Models&quot;,&quot;local&quot;:&quot;models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Schedulers&quot;,&quot;local&quot;:&quot;schedulers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="quicktour" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quicktour"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quicktour</span></h1> <p data-svelte-h="svelte-1cpm7jx">Diffusion models are trained to denoise random Gaussian noise step-by-step to generate a sample of interest, such as an image or audio. This has sparked a tremendous amount of interest in generative AI, and you have probably seen examples of diffusion generated images on the internet. 🧨 Diffusers is a library aimed at making diffusion models widely accessible to everyone.</p> <p data-svelte-h="svelte-3g0auf">Whether you’re a developer or an everyday user, this quicktour will introduce you to 🧨 Diffusers and help you get up and generating quickly! There are three main components of the library to know about:</p> <ul data-svelte-h="svelte-1qchzzn"><li>The <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> is a high-level end-to-end class designed to rapidly generate samples from pretrained diffusion models for inference.</li> <li>Popular pretrained <a href="./api/models">model</a> architectures and modules that can be used as building blocks for creating diffusion systems.</li> <li>Many different <a href="./api/schedulers/overview">schedulers</a> - algorithms that control how noise is added for training, and how to generate denoised images during inference.</li></ul> <p data-svelte-h="svelte-7kdkxv">The quicktour will show you how to use the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> for inference, and then walk you through how to combine a model and scheduler to replicate what’s happening inside the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-10po4bc">The quicktour is a simplified version of the introductory 🧨 Diffusers <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/diffusers_intro.ipynb" rel="nofollow">notebook</a> to help you get started quickly. If you want to learn more about 🧨 Diffusers’ goal, design philosophy, and additional details about its core API, check out the notebook!</p></div> <p data-svelte-h="svelte-1c9nexd">Before you begin, make sure you have all the necessary libraries installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># uncomment to install the necessary libraries in Colab</span>
<span class="hljs-comment">#!pip install --upgrade diffusers accelerate transformers</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-1iag7i0"><li><a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">🤗 Accelerate</a> speeds up model loading for inference and training.</li> <li><a href="https://huggingface.co/docs/transformers/index" rel="nofollow">🤗 Transformers</a> is required to run the most popular diffusion models, such as <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview" rel="nofollow">Stable Diffusion</a>.</li></ul> <h2 class="relative group"><a id="diffusionpipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#diffusionpipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DiffusionPipeline</span></h2> <p data-svelte-h="svelte-42w8gp">The <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> is the easiest way to use a pretrained diffusion system for inference. It is an end-to-end system containing the model and the scheduler. You can use the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> out-of-the-box for many tasks. Take a look at the table below for some supported tasks, and for a complete list of supported tasks, check out the <a href="./api/pipelines/overview#diffusers-summary">🧨 Diffusers Summary</a> table.</p> <table data-svelte-h="svelte-1jwscsx"><thead><tr><th><strong>Task</strong></th> <th><strong>Description</strong></th> <th><strong>Pipeline</strong></th></tr></thead> <tbody><tr><td>Unconditional Image Generation</td> <td>generate an image from Gaussian noise</td> <td><a href="./using-diffusers/unconditional_image_generation">unconditional_image_generation</a></td></tr> <tr><td>Text-Guided Image Generation</td> <td>generate an image given a text prompt</td> <td><a href="./using-diffusers/conditional_image_generation">conditional_image_generation</a></td></tr> <tr><td>Text-Guided Image-to-Image Translation</td> <td>adapt an image guided by a text prompt</td> <td><a href="./using-diffusers/img2img">img2img</a></td></tr> <tr><td>Text-Guided Image-Inpainting</td> <td>fill the masked part of an image given the image, the mask and a text prompt</td> <td><a href="./using-diffusers/inpaint">inpaint</a></td></tr> <tr><td>Text-Guided Depth-to-Image Translation</td> <td>adapt parts of an image guided by a text prompt while preserving structure via depth estimation</td> <td><a href="./using-diffusers/depth2img">depth2img</a></td></tr></tbody></table> <p data-svelte-h="svelte-14ivsj7">Start by creating an instance of a <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> and specify which pipeline checkpoint you would like to download.
You can use the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> for any <a href="https://huggingface.co/models?library=diffusers&sort=downloads" rel="nofollow">checkpoint</a> stored on the Hugging Face Hub.
In this quicktour, you’ll load the <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>stable-diffusion-v1-5</code></a> checkpoint for text-to-image generation.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1rkwh7b">For <a href="https://huggingface.co/CompVis/stable-diffusion" rel="nofollow">Stable Diffusion</a> models, please carefully read the <a href="https://huggingface.co/spaces/CompVis/stable-diffusion-license" rel="nofollow">license</a> first before running the model. 🧨 Diffusers implements a <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py" rel="nofollow"><code>safety_checker</code></a> to prevent offensive or harmful content, but the model’s improved image generation capabilities can still produce potentially harmful content.</p></div> <p data-svelte-h="svelte-1sqaj7s">Load the model with the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline.from_pretrained">from_pretrained()</a> method:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>pipeline = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>, use_safetensors=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rhvpsx">The <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> downloads and caches all modeling, tokenization, and scheduling components. You’ll see that the Stable Diffusion pipeline is composed of the <a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a> and <a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a> among other things:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>pipeline
StableDiffusionPipeline {
<span class="hljs-string">&quot;_class_name&quot;</span>: <span class="hljs-string">&quot;StableDiffusionPipeline&quot;</span>,
<span class="hljs-string">&quot;_diffusers_version&quot;</span>: <span class="hljs-string">&quot;0.21.4&quot;</span>,
...,
<span class="hljs-string">&quot;scheduler&quot;</span>: [
<span class="hljs-string">&quot;diffusers&quot;</span>,
<span class="hljs-string">&quot;PNDMScheduler&quot;</span>
],
...,
<span class="hljs-string">&quot;unet&quot;</span>: [
<span class="hljs-string">&quot;diffusers&quot;</span>,
<span class="hljs-string">&quot;UNet2DConditionModel&quot;</span>
],
<span class="hljs-string">&quot;vae&quot;</span>: [
<span class="hljs-string">&quot;diffusers&quot;</span>,
<span class="hljs-string">&quot;AutoencoderKL&quot;</span>
]
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vgclnr">We strongly recommend running the pipeline on a GPU because the model consists of roughly 1.4 billion parameters.
You can move the generator object to a GPU, just like you would in PyTorch:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>pipeline.to(<span class="hljs-string">&quot;cuda&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1p83go0">Now you can pass a text prompt to the <code>pipeline</code> to generate an image, and then access the denoised image. By default, the image output is wrapped in a <a href="https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class" rel="nofollow"><code>PIL.Image</code></a> object.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>image = pipeline(<span class="hljs-string">&quot;An image of a squirrel in Picasso style&quot;</span>).images[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-6cjtp1"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/image_of_squirrel_painting.png"></div> <p data-svelte-h="svelte-10baso4">Save the image by calling <code>save</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>image.save(<span class="hljs-string">&quot;image_of_squirrel_painting.png&quot;</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="local-pipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#local-pipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Local pipeline</span></h3> <p data-svelte-h="svelte-1hfw8pu">You can also use the pipeline locally. The only difference is you need to download the weights first:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!git lfs install
!git <span class="hljs-built_in">clone</span> https://huggingface.co/runwayml/stable-diffusion-v1-5<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fgahuv">Then load the saved weights into the pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>pipeline = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;./stable-diffusion-v1-5&quot;</span>, use_safetensors=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1c18le1">Now, you can run the pipeline as you would in the section above.</p> <h3 class="relative group"><a id="swapping-schedulers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#swapping-schedulers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Swapping schedulers</span></h3> <p data-svelte-h="svelte-ha3iyl">Different schedulers come with different denoising speeds and quality trade-offs. The best way to find out which one works best for you is to try them out! One of the main features of 🧨 Diffusers is to allow you to easily switch between schedulers. For example, to replace the default <a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a> with the <a href="/docs/diffusers/main/en/api/schedulers/euler#diffusers.EulerDiscreteScheduler">EulerDiscreteScheduler</a>, load it with the <a href="/docs/diffusers/main/en/api/configuration#diffusers.ConfigMixin.from_config">from_config()</a> method:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> EulerDiscreteScheduler
<span class="hljs-meta">&gt;&gt;&gt; </span>pipeline = DiffusionPipeline.from_pretrained(<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>, use_safetensors=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hnrydi">Try generating an image with the new scheduler and see if you notice a difference!</p> <p data-svelte-h="svelte-1wktwe">In the next section, you’ll take a closer look at the components - the model and scheduler - that make up the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> and learn how to use these components to generate an image of a cat.</p> <h2 class="relative group"><a id="models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Models</span></h2> <p data-svelte-h="svelte-tok5h3">Most models take a noisy sample, and at each timestep it predicts the <em>noise residual</em> (other models learn to predict the previous sample directly or the velocity or <a href="https://github.com/huggingface/diffusers/blob/5e5ce13e2f89ac45a0066cb3f369462a3cf1d9ef/src/diffusers/schedulers/scheduling_ddim.py#L110" rel="nofollow"><code>v-prediction</code></a>), the difference between a less noisy image and the input image. You can mix and match models to create other diffusion systems.</p> <p data-svelte-h="svelte-1ozhfyi">Models are initiated with the <a href="/docs/diffusers/main/en/api/models/overview#diffusers.ModelMixin.from_pretrained">from_pretrained()</a> method which also locally caches the model weights so it is faster the next time you load the model. For the quicktour, you’ll load the <a href="/docs/diffusers/main/en/api/models/unet2d#diffusers.UNet2DModel">UNet2DModel</a>, a basic unconditional image generation model with a checkpoint trained on cat images:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> UNet2DModel
<span class="hljs-meta">&gt;&gt;&gt; </span>repo_id = <span class="hljs-string">&quot;google/ddpm-cat-256&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = UNet2DModel.from_pretrained(repo_id, use_safetensors=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13d7r4d">To access the model parameters, call <code>model.config</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>model.config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-yz2ttj">The model configuration is a 🧊 frozen 🧊 dictionary, which means those parameters can’t be changed after the model is created. This is intentional and ensures that the parameters used to define the model architecture at the start remain the same, while other parameters can still be adjusted during inference.</p> <p data-svelte-h="svelte-1xmzk7t">Some of the most important parameters are:</p> <ul data-svelte-h="svelte-1t4snl"><li><code>sample_size</code>: the height and width dimension of the input sample.</li> <li><code>in_channels</code>: the number of input channels of the input sample.</li> <li><code>down_block_types</code> and <code>up_block_types</code>: the type of down- and upsampling blocks used to create the UNet architecture.</li> <li><code>block_out_channels</code>: the number of output channels of the downsampling blocks; also used in reverse order for the number of input channels of the upsampling blocks.</li> <li><code>layers_per_block</code>: the number of ResNet blocks present in each UNet block.</li></ul> <p data-svelte-h="svelte-18dlapv">To use the model for inference, create the image shape with random Gaussian noise. It should have a <code>batch</code> axis because the model can receive multiple random noises, a <code>channel</code> axis corresponding to the number of input channels, and a <code>sample_size</code> axis for the height and width of the image:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>torch.manual_seed(<span class="hljs-number">0</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>noisy_sample = torch.randn(<span class="hljs-number">1</span>, model.config.in_channels, model.config.sample_size, model.config.sample_size)
<span class="hljs-meta">&gt;&gt;&gt; </span>noisy_sample.shape
torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">3</span>, <span class="hljs-number">256</span>, <span class="hljs-number">256</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-u37s0p">For inference, pass the noisy image and a <code>timestep</code> to the model. The <code>timestep</code> indicates how noisy the input image is, with more noise at the beginning and less at the end. This helps the model determine its position in the diffusion process, whether it is closer to the start or the end. Use the <code>sample</code> method to get the model output:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> noisy_residual = model(sample=noisy_sample, timestep=<span class="hljs-number">2</span>).sample<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1c3hoxe">To generate actual examples though, you’ll need a scheduler to guide the denoising process. In the next section, you’ll learn how to couple a model with a scheduler.</p> <h2 class="relative group"><a id="schedulers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#schedulers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Schedulers</span></h2> <p data-svelte-h="svelte-vj25k8">Schedulers manage going from a noisy sample to a less noisy sample given the model output - in this case, it is the <code>noisy_residual</code>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1d30mol">🧨 Diffusers is a toolbox for building diffusion systems. While the <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> is a convenient way to get started with a pre-built diffusion system, you can also choose your own model and scheduler components separately to build a custom diffusion system.</p></div> <p data-svelte-h="svelte-xhmct">For the quicktour, you’ll instantiate the <a href="/docs/diffusers/main/en/api/schedulers/ddpm#diffusers.DDPMScheduler">DDPMScheduler</a> with its <a href="/docs/diffusers/main/en/api/configuration#diffusers.ConfigMixin.from_config">from_config()</a> method:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DDPMScheduler
<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler = DDPMScheduler.from_pretrained(repo_id)
<span class="hljs-meta">&gt;&gt;&gt; </span>scheduler
DDPMScheduler {
<span class="hljs-string">&quot;_class_name&quot;</span>: <span class="hljs-string">&quot;DDPMScheduler&quot;</span>,
<span class="hljs-string">&quot;_diffusers_version&quot;</span>: <span class="hljs-string">&quot;0.21.4&quot;</span>,
<span class="hljs-string">&quot;beta_end&quot;</span>: <span class="hljs-number">0.02</span>,
<span class="hljs-string">&quot;beta_schedule&quot;</span>: <span class="hljs-string">&quot;linear&quot;</span>,
<span class="hljs-string">&quot;beta_start&quot;</span>: <span class="hljs-number">0.0001</span>,
<span class="hljs-string">&quot;clip_sample&quot;</span>: true,
<span class="hljs-string">&quot;clip_sample_range&quot;</span>: <span class="hljs-number">1.0</span>,
<span class="hljs-string">&quot;dynamic_thresholding_ratio&quot;</span>: <span class="hljs-number">0.995</span>,
<span class="hljs-string">&quot;num_train_timesteps&quot;</span>: <span class="hljs-number">1000</span>,
<span class="hljs-string">&quot;prediction_type&quot;</span>: <span class="hljs-string">&quot;epsilon&quot;</span>,
<span class="hljs-string">&quot;sample_max_value&quot;</span>: <span class="hljs-number">1.0</span>,
<span class="hljs-string">&quot;steps_offset&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&quot;thresholding&quot;</span>: false,
<span class="hljs-string">&quot;timestep_spacing&quot;</span>: <span class="hljs-string">&quot;leading&quot;</span>,
<span class="hljs-string">&quot;trained_betas&quot;</span>: null,
<span class="hljs-string">&quot;variance_type&quot;</span>: <span class="hljs-string">&quot;fixed_small&quot;</span>
}<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-12uexhh">💡 Unlike a model, a scheduler does not have trainable weights and is parameter-free!</p></div> <p data-svelte-h="svelte-1xmzk7t">Some of the most important parameters are:</p> <ul data-svelte-h="svelte-7zrky5"><li><code>num_train_timesteps</code>: the length of the denoising process or, in other words, the number of timesteps required to process random Gaussian noise into a data sample.</li> <li><code>beta_schedule</code>: the type of noise schedule to use for inference and training.</li> <li><code>beta_start</code> and <code>beta_end</code>: the start and end noise values for the noise schedule.</li></ul> <p data-svelte-h="svelte-iouzdr">To predict a slightly less noisy image, pass the following to the scheduler’s <a href="/docs/diffusers/main/en/api/schedulers/ddpm#diffusers.DDPMScheduler.step">step()</a> method: model output, <code>timestep</code>, and current <code>sample</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>less_noisy_sample = scheduler.step(model_output=noisy_residual, timestep=<span class="hljs-number">2</span>, sample=noisy_sample).prev_sample
<span class="hljs-meta">&gt;&gt;&gt; </span>less_noisy_sample.shape
torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">3</span>, <span class="hljs-number">256</span>, <span class="hljs-number">256</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3x4azv">The <code>less_noisy_sample</code> can be passed to the next <code>timestep</code> where it’ll get even less noisy! Let’s bring it all together now and visualize the entire denoising process.</p> <p data-svelte-h="svelte-1hb61xc">First, create a function that postprocesses and displays the denoised image as a <code>PIL.Image</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> PIL.Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">display_sample</span>(<span class="hljs-params">sample, i</span>):
<span class="hljs-meta">... </span> image_processed = sample.cpu().permute(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>)
<span class="hljs-meta">... </span> image_processed = (image_processed + <span class="hljs-number">1.0</span>) * <span class="hljs-number">127.5</span>
<span class="hljs-meta">... </span> image_processed = image_processed.numpy().astype(np.uint8)
<span class="hljs-meta">... </span> image_pil = PIL.Image.fromarray(image_processed[<span class="hljs-number">0</span>])
<span class="hljs-meta">... </span> display(<span class="hljs-string">f&quot;Image at step <span class="hljs-subst">{i}</span>&quot;</span>)
<span class="hljs-meta">... </span> display(image_pil)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12lvw8p">To speed up the denoising process, move the input and model to a GPU:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>model.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>noisy_sample = noisy_sample.to(<span class="hljs-string">&quot;cuda&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vtqbzo">Now create a denoising loop that predicts the residual of the less noisy sample, and computes the less noisy sample with the scheduler:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> tqdm
<span class="hljs-meta">&gt;&gt;&gt; </span>sample = noisy_sample
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> i, t <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(tqdm.tqdm(scheduler.timesteps)):
<span class="hljs-meta">... </span> <span class="hljs-comment"># 1. predict noise residual</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> residual = model(sample, t).sample
<span class="hljs-meta">... </span> <span class="hljs-comment"># 2. compute less noisy image and set x_t -&gt; x_t-1</span>
<span class="hljs-meta">... </span> sample = scheduler.step(residual, t, sample).prev_sample
<span class="hljs-meta">... </span> <span class="hljs-comment"># 3. optionally look at image</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> (i + <span class="hljs-number">1</span>) % <span class="hljs-number">50</span> == <span class="hljs-number">0</span>:
<span class="hljs-meta">... </span> display_sample(sample, i + <span class="hljs-number">1</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ho6yui">Sit back and watch as a cat is generated from nothing but noise! 😻</p> <div class="flex justify-center" data-svelte-h="svelte-1yzsrw6"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/diffusion-quicktour.png"></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next steps</span></h2> <p data-svelte-h="svelte-1qc5p28">Hopefully, you generated some cool images with 🧨 Diffusers in this quicktour! For your next steps, you can:</p> <ul data-svelte-h="svelte-32wm8b"><li>Train or finetune a model to generate your own images in the <a href="./tutorials/basic_training">training</a> tutorial.</li> <li>See example official and community <a href="https://github.com/huggingface/diffusers/tree/main/examples#-diffusers-examples" rel="nofollow">training or finetuning scripts</a> for a variety of use cases.</li> <li>Learn more about loading, accessing, changing, and comparing schedulers in the <a href="./using-diffusers/schedulers">Using different Schedulers</a> guide.</li> <li>Explore prompt engineering, speed and memory optimizations, and tips and tricks for generating higher-quality images with the <a href="./stable_diffusion">Stable Diffusion</a> guide.</li> <li>Dive deeper into speeding up 🧨 Diffusers with guides on <a href="./optimization/fp16">optimized PyTorch on a GPU</a>, and inference guides for running <a href="./optimization/mps">Stable Diffusion on Apple Silicon (M1/M2)</a> and <a href="./optimization/onnx">ONNX Runtime</a>.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/quicktour.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1eu7tzz = {
assets: "/docs/diffusers/main/en",
base: "/docs/diffusers/main/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/diffusers/main/en/_app/immutable/entry/start.21e27d66.js"),
import("/docs/diffusers/main/en/_app/immutable/entry/app.de4fb612.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 179],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
60.9 kB
·
Xet hash:
6413d38c56b0b8f20ca23b8cf07730ef8fa9618c9edf19e47a401f774b09b397

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.