Buckets:

download
raw
27.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Unit 2: Fine-Tuning, Guidance and Conditioning&quot;,&quot;local&quot;:&quot;unit-2-fine-tuning-guidance-and-conditioning&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Start this Unit 🚀&quot;,&quot;local&quot;:&quot;start-this-unit-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Fine-Tuning&quot;,&quot;local&quot;:&quot;fine-tuning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Guidance&quot;,&quot;local&quot;:&quot;guidance&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conditioning&quot;,&quot;local&quot;:&quot;conditioning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Hands-On Notebook&quot;,&quot;local&quot;:&quot;hands-on-notebook&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Project Time&quot;,&quot;local&quot;:&quot;project-time&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Some Additional Resources&quot;,&quot;local&quot;:&quot;some-additional-resources&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/diffusion-course/pr_113/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/entry/start.d783b3e7.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/scheduler.47c1f99a.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/singletons.8d8f1267.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/paths.82d718a6.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/entry/app.21133b1e.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/preload-helper.d7d11f96.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/index.bcb71b6c.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/nodes/0.c4a51760.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/nodes/9.f36532a3.js">
<link rel="modulepreload" href="/docs/diffusion-course/pr_113/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.2aa9fd83.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Unit 2: Fine-Tuning, Guidance and Conditioning&quot;,&quot;local&quot;:&quot;unit-2-fine-tuning-guidance-and-conditioning&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Start this Unit 🚀&quot;,&quot;local&quot;:&quot;start-this-unit-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Fine-Tuning&quot;,&quot;local&quot;:&quot;fine-tuning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Guidance&quot;,&quot;local&quot;:&quot;guidance&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conditioning&quot;,&quot;local&quot;:&quot;conditioning&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Hands-On Notebook&quot;,&quot;local&quot;:&quot;hands-on-notebook&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Project Time&quot;,&quot;local&quot;:&quot;project-time&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Some Additional Resources&quot;,&quot;local&quot;:&quot;some-additional-resources&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="unit-2-fine-tuning-guidance-and-conditioning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unit-2-fine-tuning-guidance-and-conditioning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Unit 2: Fine-Tuning, Guidance and Conditioning</span></h1> <p data-svelte-h="svelte-u6zj26">Welcome to Unit 2 of the Hugging Face Diffusion Models Course! In this unit, you will learn how to use and adapt pre-trained diffusion models in new ways. You will also see how we can create diffusion models that take additional inputs as <strong>conditioning</strong> to control the generation process.</p> <h2 class="relative group"><a id="start-this-unit-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#start-this-unit-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Start this Unit 🚀</span></h2> <p data-svelte-h="svelte-1hcxnu9">Here are the steps for this unit:</p> <ul data-svelte-h="svelte-p1794t"><li>Make sure you’ve <a href="https://huggingface.us17.list-manage.com/subscribe?u=7f57e683fa28b51bfc493d048&id=ef963b4162" rel="nofollow">signed up for this course</a> so that you can be notified when new material is released.</li> <li>Read through the material below for an overview of the key ideas of this unit.</li> <li>Check out the <em><strong>Fine-tuning and Guidance</strong></em> notebook to fine-tune an existing diffusion model on a new dataset using the 🤗 Diffusers library and to modify the sampling procedure using guidance.</li> <li>Follow the example in the notebook to share a Gradio demo for your custom model.</li> <li>(Optional) Check out the <em><strong>Class-conditioned Diffusion Model Example</strong></em> notebook to see how we can add additional control to the generation process.</li> <li>(Optional) Check out <a href="https://www.youtube.com/watch?v=mY20iKOQ2zw" rel="nofollow">this video</a> for an informal run-through of the material in this unit.</li></ul> <p data-svelte-h="svelte-vvpwne">📢 Don’t forget to join the <a href="https://huggingface.co/join/discord" rel="nofollow">Discord</a>, where you can discuss the material and share what you’ve made in the <code>#diffusion-models-class</code> channel.</p> <h2 class="relative group"><a id="fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fine-Tuning</span></h2> <p data-svelte-h="svelte-1p4gi0x">As you may have seen in Unit 1, training diffusion models from scratch can be time-consuming! Especially as we push to higher resolutions, the time and data required to train a model from scratch can become impractical. Fortunately, there is a solution: begin with a model that has already been trained! This way we start from a model that has already learned to denoise images of some kind, and the hope is that this provides a better starting point than beginning from a randomly initialized model.</p> <p data-svelte-h="svelte-22pnmc"><img src="https://api.wandb.ai/files/johnowhitaker/dm_finetune/2upaa341/media/images/Sample%20generations_501_d980e7fe082aec0dfc49.png" alt="Example images generated with a model trained on LSUN Bedrooms and fine-tuned for 500 steps on WikiArt"></p> <p data-svelte-h="svelte-16dlqmy">Fine-tuning typically works best if the new data somewhat resembles the base model’s original training data (for example, beginning with a model trained on faces is probably a good idea if you’re trying to generate cartoon faces) but surprisingly the benefits persist even if the domain is changed quite drastically. The image above is generated from a <a href="https://huggingface.co/google/ddpm-bedroom-256" rel="nofollow">model trained on the LSUN Bedrooms dataset</a> and fine-tuned for 500 steps on <a href="https://huggingface.co/datasets/huggan/wikiart" rel="nofollow">the WikiArt dataset</a>. The <a href="https://github.com/huggingface/diffusion-models-class/blob/main/unit2/finetune_model.py" rel="nofollow">training script</a> is included for reference alongside the notebooks for this unit.</p> <h2 class="relative group"><a id="guidance" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#guidance"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Guidance</span></h2> <p data-svelte-h="svelte-cx8sou">Unconditional models don’t give much control over what is generated. We can train a conditional model (more on that in the next section) that takes additional inputs to help steer the generation process, but what if we already have a trained unconditional model we’d like to use? Enter guidance, a process by which the model predictions at each step in the generation process are evaluated against some guidance function and modified such that the final generated image is more to our liking.</p> <p data-svelte-h="svelte-1qxkv6z"><img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/diffusion-course/guidance_eg.png" alt="guidance example image"></p> <p data-svelte-h="svelte-1pfro1l">This guidance function can be almost anything, making this a powerful technique! In the notebook, we build up from a simple example (controlling the color, as illustrated in the example output above) to one utilizing a powerful pre-trained model called CLIP which lets us guide generation based on a text description.</p> <h2 class="relative group"><a id="conditioning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conditioning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conditioning</span></h2> <p data-svelte-h="svelte-1kfy227">Guidance is a great way to get some additional mileage from an unconditional diffusion model, but if we have additional information (such as a class label or an image caption) available during training then we can also feed this to the model for it to use as it makes its predictions. In doing so, we create a <strong>conditional</strong> model, which we can control at inference time by controlling what is fed in as conditioning. The notebook shows an example of a class-conditioned model which learns to generate images according to a class label.</p> <p data-svelte-h="svelte-ihwxtb"><img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/diffusion-course/conditional_digit_generation.png" alt="conditioning example"></p> <p data-svelte-h="svelte-itjnhm">There are a number of ways to pass in this conditioning information, such as</p> <ul data-svelte-h="svelte-ao69km"><li>Feeding it in as additional channels in the input to the UNet. This is often used when the conditioning information is the same shape as the image, such as a segmentation mask, a depth map or a blurry version of the image (in the case of a restoration/superresolution model). It does work for other types of conditioning too. For example, in the notebook, the class label is mapped to an embedding and then expanded to be the same width and height as the input image so that it can be fed in as additional channels.</li> <li>Creating an embedding and then projecting it down to a size that matches the number of channels at the output of one or more internal layers of the UNet, and then adding it to those outputs. This is how the timestep conditioning is handled, for example. The output of each Resnet block has a projected timestep embedding added to it. This is useful when you have a vector such as a CLIP image embedding as your conditioning information. A notable example is the <a href="https://huggingface.co/spaces/lambdalabs/stable-diffusion-image-variations" rel="nofollow">‘Image Variations’ version of Stable Diffusion</a> which does exactly this.</li> <li>Adding cross-attention layers that can ‘attend’ to a sequence passed in as conditioning. This is most useful when the conditioning is in the form of some text - the text is mapped to a sequence of embeddings using a transformer model, and then cross-attention layers in the UNet are used to incorporate this information into the denoising path. We’ll see this in action in Unit 3 as we examine how Stable Diffusion handles text conditioning.</li></ul> <h2 class="relative group"><a id="hands-on-notebook" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hands-on-notebook"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Hands-On Notebook</span></h2> <table data-svelte-h="svelte-e3lgjf"><thead><tr><th align="left">Chapter</th> <th align="left">Colab</th> <th align="left">Kaggle</th> <th align="left">Gradient</th> <th align="left">Studio Lab</th></tr></thead> <tbody><tr><td align="left">Fine-tuning and Guidance</td> <td align="left"><a href="https://colab.research.google.com/github/huggingface/diffusion-models-class/blob/main/unit2/01_finetuning_and_guidance.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></td> <td align="left"><a href="https://kaggle.com/kernels/welcome?src=https://github.com/huggingface/diffusion-models-class/blob/main/unit2/01_finetuning_and_guidance.ipynb" rel="nofollow"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Kaggle"></a></td> <td align="left"><a href="https://console.paperspace.com/github/huggingface/diffusion-models-class/blob/main/unit2/01_finetuning_and_guidance.ipynb" rel="nofollow"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Gradient"></a></td> <td align="left"><a href="https://studiolab.sagemaker.aws/import/github/huggingface/diffusion-models-class/blob/main/unit2/01_finetuning_and_guidance.ipynb" rel="nofollow"><img src="https://studiolab.sagemaker.aws/studiolab.svg" alt="Open In SageMaker Studio Lab"></a></td></tr> <tr><td align="left">Class-conditioned Diffusion Model Example</td> <td align="left"><a href="https://colab.research.google.com/github/huggingface/diffusion-models-class/blob/main/unit2/02_class_conditioned_diffusion_model_example.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></td> <td align="left"><a href="https://kaggle.com/kernels/welcome?src=https://github.com/huggingface/diffusion-models-class/blob/main/unit2/02_class_conditioned_diffusion_model_example.ipynb" rel="nofollow"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Kaggle"></a></td> <td align="left"><a href="https://console.paperspace.com/github/huggingface/diffusion-models-class/blob/main/unit2/02_class_conditioned_diffusion_model_example.ipynb" rel="nofollow"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Gradient"></a></td> <td align="left"><a href="https://studiolab.sagemaker.aws/import/github/huggingface/diffusion-models-class/blob/main/unit2/02_class_conditioned_diffusion_model_example.ipynb" rel="nofollow"><img src="https://studiolab.sagemaker.aws/studiolab.svg" alt="Open In SageMaker Studio Lab"></a></td></tr></tbody></table> <p data-svelte-h="svelte-6nzchf">At this point, you know enough to get started with the accompanying notebooks! Open them in your platform of choice using the links above. Fine-tuning is quite computationally intensive, so if you’re using Kaggle or Google Colab make sure you set the runtime type to ‘GPU’ for the best results.</p> <p data-svelte-h="svelte-fjxr55">The bulk of the material is in <em><strong>Fine-tuning and Guidance</strong></em>, where we explore these two topics through worked examples. The notebook shows how you can fine-tune an existing model on new data, add guidance, and share the result as a Gradio demo. There is an accompanying script (<a href="https://github.com/huggingface/diffusion-models-class/blob/main/unit2/finetune_model.py" rel="nofollow">finetune_model.py</a>) that makes it easy to experiment with different fine-tuning settings, and an <a href="https://huggingface.co/spaces/johnowhitaker/color-guided-wikiart-diffusion" rel="nofollow">example space</a> that you can use as a template for sharing your own demo on 🤗 Spaces.</p> <p data-svelte-h="svelte-ys2bcy">In the <em><strong>Class-conditioned Diffusion Model Example</strong></em>, we show a brief worked example of creating a diffusion model conditioned on class labels using the MNIST dataset. The focus is on demonstrating the core idea as simply as possible: by giving the model extra information about what it is supposed to be denoising, we can later control what kinds of images are generated at inference time.</p> <h2 class="relative group"><a id="project-time" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#project-time"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Project Time</span></h2> <p data-svelte-h="svelte-1jxuy2z">Following the examples in the <em><strong>Fine-tuning and Guidance</strong></em> notebook, fine-tune your own model or pick an existing model and create a Gradio demo to showcase your new guidance skills. Don’t forget to share your demo on Discord, Twitter etc so we can admire your work!</p> <h2 class="relative group"><a id="some-additional-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#some-additional-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Some Additional Resources</span></h2> <p data-svelte-h="svelte-natgq1"><a href="https://arxiv.org/abs/2010.02502" rel="nofollow">Denoising Diffusion Implicit Models</a> - Introduced the DDIM sampling method (used by DDIMScheduler)</p> <p data-svelte-h="svelte-132h07s"><a href="https://arxiv.org/abs/2112.10741" rel="nofollow">GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> - Introduced methods for conditioning diffusion models on text</p> <p data-svelte-h="svelte-rnfmiz"><a href="https://arxiv.org/abs/2211.01324" rel="nofollow">eDiffi: Text-to-Image Diffusion Models with an Ensemble of Expert Denoisers</a> - Shows how many different kinds of conditioning can be used together to give even more control over the kinds of samples generated</p> <p data-svelte-h="svelte-1wvf2ag">Found more great resources? Let us know and we’ll add them to this list.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusion-models-class/blob/main/unit2/README.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_x2iol1 = {
assets: "/docs/diffusion-course/pr_113/en",
base: "/docs/diffusion-course/pr_113/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/diffusion-course/pr_113/en/_app/immutable/entry/start.d783b3e7.js"),
import("/docs/diffusion-course/pr_113/en/_app/immutable/entry/app.21133b1e.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 9],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
27.9 kB
·
Xet hash:
a09a874d9f1ec9c011239d15f6c8b3110fe770157581503a3588897c400b998d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.