Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Prompt techniques","local":"prompt-techniques","sections":[{"title":"Prompt engineering","local":"prompt-engineering","sections":[],"depth":2},{"title":"Prompt enhancing with GPT2","local":"prompt-enhancing-with-gpt2","sections":[],"depth":2},{"title":"Prompt weighting","local":"prompt-weighting","sections":[{"title":"Weighting","local":"weighting","sections":[],"depth":3},{"title":"Blending","local":"blending","sections":[],"depth":3},{"title":"Conjunction","local":"conjunction","sections":[],"depth":3},{"title":"Textual inversion","local":"textual-inversion","sections":[],"depth":3},{"title":"DreamBooth","local":"dreambooth","sections":[],"depth":3},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/pr_10567/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/entry/start.5ab964f0.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/scheduler.8c3d61f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/singletons.1271a703.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/index.0997d446.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/paths.af967ee5.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/entry/app.d83dbfce.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/index.da70eac4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/nodes/0.bb4a0671.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/nodes/269.9305a9e2.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/Tip.1d9b8c37.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/CodeBlock.00a903b3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/DocNotebookDropdown.02900f6b.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/EditOnGithub.1e64e623.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Prompt techniques","local":"prompt-techniques","sections":[{"title":"Prompt engineering","local":"prompt-engineering","sections":[],"depth":2},{"title":"Prompt enhancing with GPT2","local":"prompt-enhancing-with-gpt2","sections":[],"depth":2},{"title":"Prompt weighting","local":"prompt-weighting","sections":[{"title":"Weighting","local":"weighting","sections":[],"depth":3},{"title":"Blending","local":"blending","sections":[],"depth":3},{"title":"Conjunction","local":"conjunction","sections":[],"depth":3},{"title":"Textual inversion","local":"textual-inversion","sections":[],"depth":3},{"title":"DreamBooth","local":"dreambooth","sections":[],"depth":3},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="prompt-techniques" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-techniques"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt techniques</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-1kbqxv6">Prompts are important because they describe what you want a diffusion model to generate. The best prompts are detailed, specific, and well-structured to help the model realize your vision. But crafting a great prompt takes time and effort and sometimes it may not be enough because language and words can be imprecise. This is where you need to boost your prompt with other techniques, such as prompt enhancing and prompt weighting, to get the results you want.</p> <p data-svelte-h="svelte-6johch">This guide will show you how you can use these prompt techniques to generate high-quality images with lower effort and adjust the weight of certain keywords in a prompt.</p> <h2 class="relative group"><a id="prompt-engineering" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-engineering"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt engineering</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-18ymcu1">This is not an exhaustive guide on prompt engineering, but it will help you understand the necessary parts of a good prompt. We encourage you to continue experimenting with different prompts and combine them in new ways to see what works best. As you write more prompts, you’ll develop an intuition for what works and what doesn’t!</p></div> <p data-svelte-h="svelte-1c7rxt1">New diffusion models do a pretty good job of generating high-quality images from a basic prompt, but it is still important to create a well-written prompt to get the best results. Here are a few tips for writing a good prompt:</p> <ol data-svelte-h="svelte-1cs1rnw"><li>What is the image <em>medium</em>? Is it a photo, a painting, a 3D illustration, or something else?</li> <li>What is the image <em>subject</em>? Is it a person, animal, object, or scene?</li> <li>What <em>details</em> would you like to see in the image? This is where you can get really creative and have a lot of fun experimenting with different words to bring your image to life. For example, what is the lighting like? What is the vibe and aesthetic? What kind of art or illustration style are you looking for? The more specific and precise words you use, the better the model will understand what you want to generate.</li></ol> <div class="flex gap-4" data-svelte-h="svelte-ahartp"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/plain-prompt.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"A photo of a banana-shaped couch in a living room"</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"A vibrant yellow banana-shaped couch sits in a cozy living room, its curve cradling a pile of colorful cushions. on the wooden floor, a patterned rug adds a touch of eclectic charm, and a potted plant sits in the corner, reaching towards the sunlight filtering through the windows"</figcaption></div></div> <h2 class="relative group"><a id="prompt-enhancing-with-gpt2" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-enhancing-with-gpt2"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt enhancing with GPT2</span></h2> <p data-svelte-h="svelte-dttsfg">Prompt enhancing is a technique for quickly improving prompt quality without spending too much effort constructing one. It uses a model like GPT2 pretrained on Stable Diffusion text prompts to automatically enrich a prompt with additional important keywords to generate high-quality images.</p> <p data-svelte-h="svelte-j8rp3s">The technique works by curating a list of specific keywords and forcing the model to generate those words to enhance the original prompt. This way, your prompt can be “a cat” and GPT2 can enhance the prompt to “cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain quality sharp focus beautiful detailed intricate stunning amazing epic”.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-14yr5uj">You should also use a <a href="https://www.crosslabs.org//blog/diffusion-with-offset-noise" rel="nofollow"><em>offset noise</em></a> LoRA to improve the contrast in bright and dark images and create better lighting overall. This <a href="https://hf.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_offset_example-lora_1.0.safetensors" rel="nofollow">LoRA</a> is available from <a href="https://hf.co/stabilityai/stable-diffusion-xl-base-1.0" rel="nofollow">stabilityai/stable-diffusion-xl-base-1.0</a>.</p></div> <p data-svelte-h="svelte-l7cdn9">Start by defining certain styles and a list of words (you can check out a more comprehensive list of <a href="https://hf.co/LykosAI/GPT-Prompt-Expansion-Fooocus-v2/blob/main/positive.txt" rel="nofollow">words</a> and <a href="https://github.com/lllyasviel/Fooocus/tree/main/sdxl_styles" rel="nofollow">styles</a> used by Fooocus) to enhance a prompt with.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> GenerationConfig, GPT2LMHeadModel, GPT2Tokenizer, LogitsProcessor, LogitsProcessorList | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline | |
| styles = { | |
| <span class="hljs-string">"cinematic"</span>: <span class="hljs-string">"cinematic film still of {prompt}, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</span>, | |
| <span class="hljs-string">"anime"</span>: <span class="hljs-string">"anime artwork of {prompt}, anime style, key visual, vibrant, studio anime, highly detailed"</span>, | |
| <span class="hljs-string">"photographic"</span>: <span class="hljs-string">"cinematic photo of {prompt}, 35mm photograph, film, professional, 4k, highly detailed"</span>, | |
| <span class="hljs-string">"comic"</span>: <span class="hljs-string">"comic of {prompt}, graphic illustration, comic art, graphic novel art, vibrant, highly detailed"</span>, | |
| <span class="hljs-string">"lineart"</span>: <span class="hljs-string">"line art drawing {prompt}, professional, sleek, modern, minimalist, graphic, line art, vector graphics"</span>, | |
| <span class="hljs-string">"pixelart"</span>: <span class="hljs-string">" pixel-art {prompt}, low-res, blocky, pixel art style, 8-bit graphics"</span>, | |
| } | |
| words = [ | |
| <span class="hljs-string">"aesthetic"</span>, <span class="hljs-string">"astonishing"</span>, <span class="hljs-string">"beautiful"</span>, <span class="hljs-string">"breathtaking"</span>, <span class="hljs-string">"composition"</span>, <span class="hljs-string">"contrasted"</span>, <span class="hljs-string">"epic"</span>, <span class="hljs-string">"moody"</span>, <span class="hljs-string">"enhanced"</span>, | |
| <span class="hljs-string">"exceptional"</span>, <span class="hljs-string">"fascinating"</span>, <span class="hljs-string">"flawless"</span>, <span class="hljs-string">"glamorous"</span>, <span class="hljs-string">"glorious"</span>, <span class="hljs-string">"illumination"</span>, <span class="hljs-string">"impressive"</span>, <span class="hljs-string">"improved"</span>, | |
| <span class="hljs-string">"inspirational"</span>, <span class="hljs-string">"magnificent"</span>, <span class="hljs-string">"majestic"</span>, <span class="hljs-string">"hyperrealistic"</span>, <span class="hljs-string">"smooth"</span>, <span class="hljs-string">"sharp"</span>, <span class="hljs-string">"focus"</span>, <span class="hljs-string">"stunning"</span>, <span class="hljs-string">"detailed"</span>, | |
| <span class="hljs-string">"intricate"</span>, <span class="hljs-string">"dramatic"</span>, <span class="hljs-string">"high"</span>, <span class="hljs-string">"quality"</span>, <span class="hljs-string">"perfect"</span>, <span class="hljs-string">"light"</span>, <span class="hljs-string">"ultra"</span>, <span class="hljs-string">"highly"</span>, <span class="hljs-string">"radiant"</span>, <span class="hljs-string">"satisfying"</span>, | |
| <span class="hljs-string">"soothing"</span>, <span class="hljs-string">"sophisticated"</span>, <span class="hljs-string">"stylish"</span>, <span class="hljs-string">"sublime"</span>, <span class="hljs-string">"terrific"</span>, <span class="hljs-string">"touching"</span>, <span class="hljs-string">"timeless"</span>, <span class="hljs-string">"wonderful"</span>, <span class="hljs-string">"unbelievable"</span>, | |
| <span class="hljs-string">"elegant"</span>, <span class="hljs-string">"awesome"</span>, <span class="hljs-string">"amazing"</span>, <span class="hljs-string">"dynamic"</span>, <span class="hljs-string">"trendy"</span>, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-i0sn10">You may have noticed in the <code>words</code> list, there are certain words that can be paired together to create something more meaningful. For example, the words “high” and “quality” can be combined to create “high quality”. Let’s pair these words together and remove the words that can’t be paired.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->word_pairs = [<span class="hljs-string">"highly detailed"</span>, <span class="hljs-string">"high quality"</span>, <span class="hljs-string">"enhanced quality"</span>, <span class="hljs-string">"perfect composition"</span>, <span class="hljs-string">"dynamic light"</span>] | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">find_and_order_pairs</span>(<span class="hljs-params">s, pairs</span>): | |
| words = s.split() | |
| found_pairs = [] | |
| <span class="hljs-keyword">for</span> pair <span class="hljs-keyword">in</span> pairs: | |
| pair_words = pair.split() | |
| <span class="hljs-keyword">if</span> pair_words[<span class="hljs-number">0</span>] <span class="hljs-keyword">in</span> words <span class="hljs-keyword">and</span> pair_words[<span class="hljs-number">1</span>] <span class="hljs-keyword">in</span> words: | |
| found_pairs.append(pair) | |
| words.remove(pair_words[<span class="hljs-number">0</span>]) | |
| words.remove(pair_words[<span class="hljs-number">1</span>]) | |
| <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> words[:]: | |
| <span class="hljs-keyword">for</span> pair <span class="hljs-keyword">in</span> pairs: | |
| <span class="hljs-keyword">if</span> word <span class="hljs-keyword">in</span> pair.split(): | |
| words.remove(word) | |
| <span class="hljs-keyword">break</span> | |
| ordered_pairs = <span class="hljs-string">", "</span>.join(found_pairs) | |
| remaining_s = <span class="hljs-string">", "</span>.join(words) | |
| <span class="hljs-keyword">return</span> ordered_pairs, remaining_s<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-117it8a">Next, implement a custom <a href="https://huggingface.co/docs/transformers/main/en/internal/generation_utils#transformers.LogitsProcessor" rel="nofollow">LogitsProcessor</a> class that assigns tokens in the <code>words</code> list a value of 0 and assigns tokens not in the <code>words</code> list a negative value so they aren’t picked during generation. This way, generation is biased towards words in the <code>words</code> list. After a word from the list is used, it is also assigned a negative value so it isn’t picked again.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomLogitsProcessor</span>(<span class="hljs-title class_ inherited__">LogitsProcessor</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, bias</span>): | |
| <span class="hljs-built_in">super</span>().__init__() | |
| self.bias = bias | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__call__</span>(<span class="hljs-params">self, input_ids, scores</span>): | |
| <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(input_ids.shape) == <span class="hljs-number">2</span>: | |
| last_token_id = input_ids[<span class="hljs-number">0</span>, -<span class="hljs-number">1</span>] | |
| self.bias[last_token_id] = -<span class="hljs-number">1e10</span> | |
| <span class="hljs-keyword">return</span> scores + self.bias | |
| word_ids = [tokenizer.encode(word, add_prefix_space=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> words] | |
| bias = torch.full((tokenizer.vocab_size,), -<span class="hljs-built_in">float</span>(<span class="hljs-string">"Inf"</span>)).to(<span class="hljs-string">"cuda"</span>) | |
| bias[word_ids] = <span class="hljs-number">0</span> | |
| processor = CustomLogitsProcessor(bias) | |
| processor_list = LogitsProcessorList([processor])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3e511v">Combine the prompt and the <code>cinematic</code> style prompt defined in the <code>styles</code> dictionary earlier.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a cat basking in the sun on a roof in Turkey"</span> | |
| style = <span class="hljs-string">"cinematic"</span> | |
| prompt = styles[style].<span class="hljs-built_in">format</span>(prompt=prompt) | |
| prompt | |
| <span class="hljs-string">"cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1kowtsg">Load a GPT2 tokenizer and model from the <a href="https://huggingface.co/Gustavosta/MagicPrompt-Stable-Diffusion" rel="nofollow">Gustavosta/MagicPrompt-Stable-Diffusion</a> checkpoint (this specific checkpoint is trained to generate prompts) to enhance the prompt.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer = GPT2Tokenizer.from_pretrained(<span class="hljs-string">"Gustavosta/MagicPrompt-Stable-Diffusion"</span>) | |
| model = GPT2LMHeadModel.from_pretrained(<span class="hljs-string">"Gustavosta/MagicPrompt-Stable-Diffusion"</span>, torch_dtype=torch.float16).to( | |
| <span class="hljs-string">"cuda"</span> | |
| ) | |
| model.<span class="hljs-built_in">eval</span>() | |
| inputs = tokenizer(prompt, return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>) | |
| token_count = inputs[<span class="hljs-string">"input_ids"</span>].shape[<span class="hljs-number">1</span>] | |
| max_new_tokens = <span class="hljs-number">50</span> - token_count | |
| generation_config = GenerationConfig( | |
| penalty_alpha=<span class="hljs-number">0.7</span>, | |
| top_k=<span class="hljs-number">50</span>, | |
| eos_token_id=model.config.eos_token_id, | |
| pad_token_id=model.config.eos_token_id, | |
| pad_token=model.config.pad_token_id, | |
| do_sample=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| generated_ids = model.generate( | |
| input_ids=inputs[<span class="hljs-string">"input_ids"</span>], | |
| attention_mask=inputs[<span class="hljs-string">"attention_mask"</span>], | |
| max_new_tokens=max_new_tokens, | |
| generation_config=generation_config, | |
| logits_processor=proccesor_list, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-r4gy79">Then you can combine the input prompt and the generated prompt. Feel free to take a look at what the generated prompt (<code>generated_part</code>) is, the word pairs that were found (<code>pairs</code>), and the remaining words (<code>words</code>). This is all packed together in the <code>enhanced_prompt</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->output_tokens = [tokenizer.decode(generated_id, skip_special_tokens=<span class="hljs-literal">True</span>) <span class="hljs-keyword">for</span> generated_id <span class="hljs-keyword">in</span> generated_ids] | |
| input_part, generated_part = output_tokens[<span class="hljs-number">0</span>][: <span class="hljs-built_in">len</span>(prompt)], output_tokens[<span class="hljs-number">0</span>][<span class="hljs-built_in">len</span>(prompt) :] | |
| pairs, words = find_and_order_pairs(generated_part, word_pairs) | |
| formatted_generated_part = pairs + <span class="hljs-string">", "</span> + words | |
| enhanced_prompt = input_part + <span class="hljs-string">", "</span> + formatted_generated_part | |
| enhanced_prompt | |
| [<span class="hljs-string">"cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain quality sharp focus beautiful detailed intricate stunning amazing epic"</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14szpok">Finally, load a pipeline and the offset noise LoRA with a <em>low weight</em> to generate an image with the enhanced prompt.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = StableDiffusionXLPipeline.from_pretrained( | |
| <span class="hljs-string">"RunDiffusion/Juggernaut-XL-v9"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.load_lora_weights( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| weight_name=<span class="hljs-string">"sd_xl_offset_example-lora_1.0.safetensors"</span>, | |
| adapter_name=<span class="hljs-string">"offset"</span>, | |
| ) | |
| pipeline.set_adapters([<span class="hljs-string">"offset"</span>], adapter_weights=[<span class="hljs-number">0.2</span>]) | |
| image = pipeline( | |
| enhanced_prompt, | |
| width=<span class="hljs-number">1152</span>, | |
| height=<span class="hljs-number">896</span>, | |
| guidance_scale=<span class="hljs-number">7.5</span>, | |
| num_inference_steps=<span class="hljs-number">25</span>, | |
| ).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-12k1k80"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/non-enhanced-prompt.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"a cat basking in the sun on a roof in Turkey"</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/enhanced-prompt.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</figcaption></div></div> <h2 class="relative group"><a id="prompt-weighting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-weighting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt weighting</span></h2> <p data-svelte-h="svelte-1tre73w">Prompt weighting provides a way to emphasize or de-emphasize certain parts of a prompt, allowing for more control over the generated image. A prompt can include several concepts, which gets turned into contextualized text embeddings. The embeddings are used by the model to condition its cross-attention layers to generate an image (read the Stable Diffusion <a href="https://huggingface.co/blog/stable_diffusion" rel="nofollow">blog post</a> to learn more about how it works).</p> <p data-svelte-h="svelte-1vdigbx">Prompt weighting works by increasing or decreasing the scale of the text embedding vector that corresponds to its concept in the prompt because you may not necessarily want the model to focus on all concepts equally. The easiest way to prepare the prompt-weighted embeddings is to use <a href="https://github.com/damian0815/compel" rel="nofollow">Compel</a>, a text prompt-weighting and blending library. Once you have the prompt-weighted embeddings, you can pass them to any pipeline that has a <a href="https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline.__call__.prompt_embeds" rel="nofollow"><code>prompt_embeds</code></a> (and optionally <a href="https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline.__call__.negative_prompt_embeds" rel="nofollow"><code>negative_prompt_embeds</code></a>) parameter, such as <a href="/docs/diffusers/pr_10567/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>, <a href="/docs/diffusers/pr_10567/en/api/pipelines/controlnet#diffusers.StableDiffusionControlNetPipeline">StableDiffusionControlNetPipeline</a>, and <a href="/docs/diffusers/pr_10567/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline">StableDiffusionXLPipeline</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-nya19q">If your favorite pipeline doesn’t have a <code>prompt_embeds</code> parameter, please open an <a href="https://github.com/huggingface/diffusers/issues/new/choose" rel="nofollow">issue</a> so we can add it!</p></div> <p data-svelte-h="svelte-xw5jy8">This guide will show you how to weight and blend your prompts with Compel in 🤗 Diffusers.</p> <p data-svelte-h="svelte-1tbon0u">Before you begin, make sure you have the latest version of Compel installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># uncomment to install in Colab</span> | |
| <span class="hljs-comment">#!pip install compel --upgrade</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pzjol8">For this guide, let’s generate an image with the prompt <code>"a red cat playing with a ball"</code> using the <a href="/docs/diffusers/pr_10567/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline, UniPCMultistepScheduler | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = StableDiffusionPipeline.from_pretrained(<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>, use_safetensors=<span class="hljs-literal">True</span>) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a red cat playing with a ball"</span> | |
| generator = torch.Generator(device=<span class="hljs-string">"cpu"</span>).manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(prompt, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1brza9b"><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/compel/forest_0.png"></div> <h3 class="relative group"><a id="weighting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#weighting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Weighting</span></h3> <p data-svelte-h="svelte-137cdqc">You’ll notice there is no “ball” in the image! Let’s use compel to upweight the concept of “ball” in the prompt. Create a <a href="https://github.com/damian0815/compel/blob/main/doc/compel.md#compel-objects" rel="nofollow"><code>Compel</code></a> object, and pass it a tokenizer and text encoder:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> compel <span class="hljs-keyword">import</span> Compel | |
| compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18ay2r3">compel uses <code>+</code> or <code>-</code> to increase or decrease the weight of a word in the prompt. To increase the weight of “ball”:</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1dn4vb7"><code>+</code> corresponds to the value <code>1.1</code>, <code>++</code> corresponds to <code>1.1^2</code>, and so on. Similarly, <code>-</code> corresponds to <code>0.9</code> and <code>--</code> corresponds to <code>0.9^2</code>. Feel free to experiment with adding more <code>+</code> or <code>-</code> in your prompt!</p></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a red cat playing with a ball++"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vulem2">Pass the prompt to <code>compel_proc</code> to create the new prompt embeddings which are passed to the pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt_embeds = compel_proc(prompt) | |
| generator = torch.manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-17zkpq4"><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/compel/forest_1.png"></div> <p data-svelte-h="svelte-gxkdpi">To downweight parts of the prompt, use the <code>-</code> suffix:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a red------- cat playing with a ball"</span> | |
| prompt_embeds = compel_proc(prompt) | |
| generator = torch.manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1d7lfen"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-neg.png"></div> <p data-svelte-h="svelte-1f6exw0">You can even up or downweight multiple concepts in the same prompt:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a red cat++ playing with a ball----"</span> | |
| prompt_embeds = compel_proc(prompt) | |
| generator = torch.manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1qezu4q"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-pos-neg.png"></div> <h3 class="relative group"><a id="blending" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#blending"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Blending</span></h3> <p data-svelte-h="svelte-1oxm60">You can also create a weighted <em>blend</em> of prompts by adding <code>.blend()</code> to a list of prompts and passing it some weights. Your blend may not always produce the result you expect because it breaks some assumptions about how the text encoder functions, so just have fun and experiment with it!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt_embeds = compel_proc(<span class="hljs-string">'("a red cat playing with a ball", "jungle").blend(0.7, 0.8)'</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-8v41o8"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-blend.png"></div> <h3 class="relative group"><a id="conjunction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conjunction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conjunction</span></h3> <p data-svelte-h="svelte-tjkp7t">A conjunction diffuses each prompt independently and concatenates their results by their weighted sum. Add <code>.and()</code> to the end of a list of prompts to create a conjunction:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt_embeds = compel_proc(<span class="hljs-string">'["a red cat", "playing with a", "ball"].and()'</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">55</span>) | |
| image = pipe(prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=<span class="hljs-number">20</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1ycuwub"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-conj.png"></div> <h3 class="relative group"><a id="textual-inversion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#textual-inversion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Textual inversion</span></h3> <p data-svelte-h="svelte-lcas44"><a href="../training/text_inversion">Textual inversion</a> is a technique for learning a specific concept from some images which you can use to generate new images conditioned on that concept.</p> <p data-svelte-h="svelte-vnqyeq">Create a pipeline and use the <a href="/docs/diffusers/pr_10567/en/api/loaders/textual_inversion#diffusers.loaders.TextualInversionLoaderMixin.load_textual_inversion">load_textual_inversion()</a> function to load the textual inversion embeddings (feel free to browse the <a href="https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer" rel="nofollow">Stable Diffusion Conceptualizer</a> for 100+ trained concepts):</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">from</span> compel <span class="hljs-keyword">import</span> Compel, DiffusersTextualInversionManager | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, | |
| use_safetensors=<span class="hljs-literal">True</span>, variant=<span class="hljs-string">"fp16"</span>).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.load_textual_inversion(<span class="hljs-string">"sd-concepts-library/midjourney-style"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-h0g1y9">Compel provides a <code>DiffusersTextualInversionManager</code> class to simplify prompt weighting with textual inversion. Instantiate <code>DiffusersTextualInversionManager</code> and pass it to the <code>Compel</code> class:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->textual_inversion_manager = DiffusersTextualInversionManager(pipe) | |
| compel_proc = Compel( | |
| tokenizer=pipe.tokenizer, | |
| text_encoder=pipe.text_encoder, | |
| textual_inversion_manager=textual_inversion_manager)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fkpura">Incorporate the concept to condition a prompt with using the <code><concept></code> syntax:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt_embeds = compel_proc(<span class="hljs-string">'("A red cat++ playing with a ball <midjourney-style>")'</span>) | |
| image = pipe(prompt_embeds=prompt_embeds).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1awripq"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-text-inversion.png"></div> <h3 class="relative group"><a id="dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DreamBooth</span></h3> <p data-svelte-h="svelte-uspz8q"><a href="../training/dreambooth">DreamBooth</a> is a technique for generating contextualized images of a subject given just a few images of the subject to train on. It is similar to textual inversion, but DreamBooth trains the full model whereas textual inversion only fine-tunes the text embeddings. This means you should use <a href="/docs/diffusers/pr_10567/en/api/pipelines/overview#diffusers.DiffusionPipeline.from_pretrained">from_pretrained()</a> to load the DreamBooth model (feel free to browse the <a href="https://huggingface.co/sd-dreambooth-library" rel="nofollow">Stable Diffusion Dreambooth Concepts Library</a> for 100+ trained models):</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, UniPCMultistepScheduler | |
| <span class="hljs-keyword">from</span> compel <span class="hljs-keyword">import</span> Compel | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"sd-dreambooth-library/dndcoverart-v1"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1le33cx">Create a <code>Compel</code> class with a tokenizer and text encoder, and pass your prompt to it. Depending on the model you use, you’ll need to incorporate the model’s unique identifier into your prompt. For example, the <code>dndcoverart-v1</code> model uses the identifier <code>dndcoverart</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder) | |
| prompt_embeds = compel_proc(<span class="hljs-string">'("magazine cover of a dndcoverart dragon, high quality, intricate details, larry elmore art style").and()'</span>) | |
| image = pipe(prompt_embeds=prompt_embeds).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-zvft88"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/compel-dreambooth.png"></div> <h3 class="relative group"><a id="stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL</span></h3> <p data-svelte-h="svelte-xo93eu">Stable Diffusion XL (SDXL) has two tokenizers and text encoders so it’s usage is a bit different. To address this, you should pass both tokenizers and encoders to the <code>Compel</code> class:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> compel <span class="hljs-keyword">import</span> Compel, ReturnedEmbeddingsType | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| variant=<span class="hljs-string">"fp16"</span>, | |
| use_safetensors=<span class="hljs-literal">True</span>, | |
| torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| compel = Compel( | |
| tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , | |
| text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], | |
| returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, | |
| requires_pooled=[<span class="hljs-literal">False</span>, <span class="hljs-literal">True</span>] | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-dnd5a9">This time, let’s upweight “ball” by a factor of 1.5 for the first prompt, and downweight “ball” by 0.6 for the second prompt. The <a href="/docs/diffusers/pr_10567/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline">StableDiffusionXLPipeline</a> also requires <a href="https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLInpaintPipeline.__call__.pooled_prompt_embeds" rel="nofollow"><code>pooled_prompt_embeds</code></a> (and optionally <a href="https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLInpaintPipeline.__call__.negative_pooled_prompt_embeds" rel="nofollow"><code>negative_pooled_prompt_embeds</code></a>) so you should pass those to the pipeline along with the conditioning tensors:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># apply weights</span> | |
| prompt = [<span class="hljs-string">"a red cat playing with a (ball)1.5"</span>, <span class="hljs-string">"a red cat playing with a (ball)0.6"</span>] | |
| conditioning, pooled = compel(prompt) | |
| <span class="hljs-comment"># generate image</span> | |
| generator = [torch.Generator().manual_seed(<span class="hljs-number">33</span>) <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(prompt))] | |
| images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=<span class="hljs-number">30</span>).images | |
| make_image_grid(images, rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex gap-4" data-svelte-h="svelte-idvlgw"><div><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/compel/sdxl_ball1.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"a red cat playing with a (ball)1.5"</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/compel/sdxl_ball2.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">"a red cat playing with a (ball)0.6"</figcaption></div></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/weighted_prompts.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_inhvqu = { | |
| assets: "/docs/diffusers/pr_10567/en", | |
| base: "/docs/diffusers/pr_10567/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/pr_10567/en/_app/immutable/entry/start.5ab964f0.js"), | |
| import("/docs/diffusers/pr_10567/en/_app/immutable/entry/app.d83dbfce.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 269], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 82.1 kB
- Xet hash:
- 3d705b37b330f6e599cc5a713f39564e95b8b76dbbe08d3449d974a33ec719db
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.