Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Textual Inversion","local":"textual-inversion","sections":[{"title":"Script parameters","local":"script-parameters","sections":[],"depth":2},{"title":"Training script","local":"training-script","sections":[],"depth":2},{"title":"Launch the script","local":"launch-the-script","sections":[],"depth":2},{"title":"Next steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/pr_11739/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/entry/start.56da5b2b.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/scheduler.53228c21.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/singletons.70d778ba.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/index.e93d0901.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/paths.a0989064.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/entry/app.794fd245.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/preload-helper.b5c24ab3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/index.100fac89.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/nodes/0.d68fe2c3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/nodes/318.bbf03424.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/CopyLLMTxtMenu.ed0e3681.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/IconCopy.38cf8f56.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.dd42f483.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_11739/en/_app/immutable/chunks/CodeBlock.d30a6509.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Textual Inversion","local":"textual-inversion","sections":[{"title":"Script parameters","local":"script-parameters","sections":[],"depth":2},{"title":"Training script","local":"training-script","sections":[],"depth":2},{"title":"Launch the script","local":"launch-the-script","sections":[],"depth":2},{"title":"Next steps","local":"next-steps","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="textual-inversion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#textual-inversion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Textual Inversion</span></h1> <p data-svelte-h="svelte-vnm0dn"><a href="https://hf.co/papers/2208.01618" rel="nofollow">Textual Inversion</a> is a training technique for personalizing image generation models with just a few example images of what you want it to learn. This technique works by learning and updating the text embeddings (the new embeddings are tied to a special word you must use in the prompt) to match the example images you provide.</p> <p data-svelte-h="svelte-1ns9xvu">If you’re training on a GPU with limited vRAM, you should try enabling the <code>gradient_checkpointing</code> and <code>mixed_precision</code> parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with <a href="../optimization/xformers">xFormers</a>.</p> <p data-svelte-h="svelte-vv5nfp">This guide will explore the <a href="https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py" rel="nofollow">textual_inversion.py</a> script to help you become more familiar with it, and how you can adapt it for your own use-case.</p> <p data-svelte-h="svelte-l7dm2q">Before running the script, make sure you install the library from source:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->git <span class="hljs-built_in">clone</span> https://github.com/huggingface/diffusers | |
| <span class="hljs-built_in">cd</span> diffusers | |
| pip install .<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1nancx8">Navigate to the example folder with the training script and install the required dependencies for the script you’re using:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> examples/textual_inversion | |
| pip install -r requirements.txt<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-1in84qc"><p>🤗 Accelerate is a library for helping you train on multiple GPUs/TPUs or with mixed-precision. It’ll automatically configure your training setup based on your hardware and environment. Take a look at the 🤗 Accelerate <a href="https://huggingface.co/docs/accelerate/quicktour" rel="nofollow">Quick tour</a> to learn more.</p></blockquote> <p data-svelte-h="svelte-60q53m">Initialize an 🤗 Accelerate environment:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-di6juu">To setup a default 🤗 Accelerate environment without choosing any configurations:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config default<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-tsz4qp">Or if your environment doesn’t support an interactive shell, like a notebook, you can use:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config | |
| write_basic_config()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fkfdql">Lastly, if you want to train a model on your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide to learn how to create a dataset that works with the training script.</p> <blockquote class="tip" data-svelte-h="svelte-upgvrc"><p>The following sections highlight parts of the training script that are important for understanding how to modify it, but it doesn’t cover every aspect of the script in detail. If you’re interested in learning more, feel free to read through the <a href="https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py" rel="nofollow">script</a> and let us know if you have any questions or concerns.</p></blockquote> <h2 class="relative group"><a id="script-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#script-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Script parameters</span></h2> <p data-svelte-h="svelte-1mhz0qu">The training script has many parameters to help you tailor the training run to your needs. All of the parameters and their descriptions are listed in the <a href="https://github.com/huggingface/diffusers/blob/839c2a5ece0af4e75530cb520d77bc7ed8acf474/examples/textual_inversion/textual_inversion.py#L176" rel="nofollow"><code>parse_args()</code></a> function. Where applicable, Diffusers provides default values for each parameter such as the training batch size and learning rate, but feel free to change these values in the training command if you’d like.</p> <p data-svelte-h="svelte-g9gzy9">For example, to increase the number of gradient accumulation steps above the default value of 1:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch textual_inversion.py \ | |
| --gradient_accumulation_steps=4<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-tgclk5">Some other basic and important parameters to specify include:</p> <ul data-svelte-h="svelte-eyv1zk"><li><code>--pretrained_model_name_or_path</code>: the name of the model on the Hub or a local path to the pretrained model</li> <li><code>--train_data_dir</code>: path to a folder containing the training dataset (example images)</li> <li><code>--output_dir</code>: where to save the trained model</li> <li><code>--push_to_hub</code>: whether to push the trained model to the Hub</li> <li><code>--checkpointing_steps</code>: frequency of saving a checkpoint as the model trains; this is useful if for some reason training is interrupted, you can continue training from that checkpoint by adding <code>--resume_from_checkpoint</code> to your training command</li> <li><code>--num_vectors</code>: the number of vectors to learn the embeddings with; increasing this parameter helps the model learn better but it comes with increased training costs</li> <li><code>--placeholder_token</code>: the special word to tie the learned embeddings to (you must use the word in your prompt for inference)</li> <li><code>--initializer_token</code>: a single-word that roughly describes the object or style you’re trying to train on</li> <li><code>--learnable_property</code>: whether you’re training the model to learn a new “style” (for example, Van Gogh’s painting style) or “object” (for example, your dog)</li></ul> <h2 class="relative group"><a id="training-script" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-script"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training script</span></h2> <p data-svelte-h="svelte-qs703o">Unlike some of the other training scripts, textual_inversion.py has a custom dataset class, <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L487" rel="nofollow"><code>TextualInversionDataset</code></a> for creating a dataset. You can customize the image size, placeholder token, interpolation method, whether to crop the image, and more. If you need to change how the dataset is created, you can modify <code>TextualInversionDataset</code>.</p> <p data-svelte-h="svelte-k3h2bp">Next, you’ll find the dataset preprocessing code and training loop in the <a href="https://github.com/huggingface/diffusers/blob/839c2a5ece0af4e75530cb520d77bc7ed8acf474/examples/textual_inversion/textual_inversion.py#L573" rel="nofollow"><code>main()</code></a> function.</p> <p data-svelte-h="svelte-4766ss">The script starts by loading the <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L616" rel="nofollow">tokenizer</a>, <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L622" rel="nofollow">scheduler and model</a>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Load tokenizer</span> | |
| <span class="hljs-keyword">if</span> args.tokenizer_name: | |
| tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) | |
| <span class="hljs-keyword">elif</span> args.pretrained_model_name_or_path: | |
| tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_model_name_or_path, subfolder=<span class="hljs-string">"tokenizer"</span>) | |
| <span class="hljs-comment"># Load scheduler and models</span> | |
| noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder=<span class="hljs-string">"scheduler"</span>) | |
| text_encoder = CLIPTextModel.from_pretrained( | |
| args.pretrained_model_name_or_path, subfolder=<span class="hljs-string">"text_encoder"</span>, revision=args.revision | |
| ) | |
| vae = AutoencoderKL.from_pretrained(args.pretrained_model_name_or_path, subfolder=<span class="hljs-string">"vae"</span>, revision=args.revision) | |
| unet = UNet2DConditionModel.from_pretrained( | |
| args.pretrained_model_name_or_path, subfolder=<span class="hljs-string">"unet"</span>, revision=args.revision | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16cbi1f">The special <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L632" rel="nofollow">placeholder token</a> is added next to the tokenizer, and the embedding is readjusted to account for the new token.</p> <p data-svelte-h="svelte-70930c">Then, the script <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L716" rel="nofollow">creates a dataset</a> from the <code>TextualInversionDataset</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->train_dataset = TextualInversionDataset( | |
| data_root=args.train_data_dir, | |
| tokenizer=tokenizer, | |
| size=args.resolution, | |
| placeholder_token=(<span class="hljs-string">" "</span>.join(tokenizer.convert_ids_to_tokens(placeholder_token_ids))), | |
| repeats=args.repeats, | |
| learnable_property=args.learnable_property, | |
| center_crop=args.center_crop, | |
| <span class="hljs-built_in">set</span>=<span class="hljs-string">"train"</span>, | |
| ) | |
| train_dataloader = torch.utils.data.DataLoader( | |
| train_dataset, batch_size=args.train_batch_size, shuffle=<span class="hljs-literal">True</span>, num_workers=args.dataloader_num_workers | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mc4ymd">Finally, the <a href="https://github.com/huggingface/diffusers/blob/b81c69e489aad3a0ba73798c459a33990dc4379c/examples/textual_inversion/textual_inversion.py#L784" rel="nofollow">training loop</a> handles everything else from predicting the noisy residual to updating the embedding weights of the special placeholder token.</p> <p data-svelte-h="svelte-6gmbd2">If you want to learn more about how the training loop works, check out the <a href="../using-diffusers/write_own_pipeline">Understanding pipelines, models and schedulers</a> tutorial which breaks down the basic pattern of the denoising process.</p> <h2 class="relative group"><a id="launch-the-script" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#launch-the-script"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Launch the script</span></h2> <p data-svelte-h="svelte-9dei1q">Once you’ve made all your changes or you’re okay with the default configuration, you’re ready to launch the training script! 🚀</p> <p data-svelte-h="svelte-1gl7574">For this guide, you’ll download some images of a <a href="https://huggingface.co/datasets/diffusers/cat_toy_example" rel="nofollow">cat toy</a> and store them in a directory. But remember, you can create and use your own dataset if you want (see the <a href="create_dataset">Create a dataset for training</a> guide).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> snapshot_download | |
| local_dir = <span class="hljs-string">"./cat"</span> | |
| snapshot_download( | |
| <span class="hljs-string">"diffusers/cat_toy_example"</span>, local_dir=local_dir, repo_type=<span class="hljs-string">"dataset"</span>, ignore_patterns=<span class="hljs-string">".gitattributes"</span> | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k1bp7z">Set the environment variable <code>MODEL_NAME</code> to a model id on the Hub or a path to a local model, and <code>DATA_DIR</code> to the path where you just downloaded the cat images to. The script creates and saves the following files to your repository:</p> <ul data-svelte-h="svelte-17m22mk"><li><code>learned_embeds.bin</code>: the learned embedding vectors corresponding to your example images</li> <li><code>token_identifier.txt</code>: the special placeholder token</li> <li><code>type_of_concept.txt</code>: the type of concept you’re training on (either “object” or “style”)</li></ul> <blockquote class="warning" data-svelte-h="svelte-mhhy05"><p>A full training run takes ~1 hour on a single V100 GPU.</p></blockquote> <p data-svelte-h="svelte-iowxvh">One more thing before you launch the script. If you’re interested in following along with the training process, you can periodically save generated images as training progresses. Add the following parameters to the training command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->--validation_prompt=<span class="hljs-string">"A <cat-toy> train"</span> | |
| --num_validation_images=4 | |
| --validation_steps=100<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> DATA_DIR=<span class="hljs-string">"./cat"</span> | |
| accelerate launch textual_inversion.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --train_data_dir=<span class="hljs-variable">$DATA_DIR</span> \ | |
| --learnable_property=<span class="hljs-string">"object"</span> \ | |
| --placeholder_token=<span class="hljs-string">"<cat-toy>"</span> \ | |
| --initializer_token=<span class="hljs-string">"toy"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --max_train_steps=3000 \ | |
| --learning_rate=5.0e-04 \ | |
| --scale_lr \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --output_dir=<span class="hljs-string">"textual_inversion_cat"</span> \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pqx2cy">After training is complete, you can use your newly trained model for inference like:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = StableDiffusionPipeline.from_pretrained(<span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.load_textual_inversion(<span class="hljs-string">"sd-concepts-library/cat-toy"</span>) | |
| image = pipeline(<span class="hljs-string">"A <cat-toy> train"</span>, num_inference_steps=<span class="hljs-number">50</span>).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"cat-train.png"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next steps</span></h2> <p data-svelte-h="svelte-18p6tri">Congratulations on training your own Textual Inversion model! 🎉 To learn more about how to use your new model, the following guides may be helpful:</p> <ul data-svelte-h="svelte-gnhmwo"><li>Learn how to <a href="../using-diffusers/textual_inversion_inference">load Textual Inversion embeddings</a> and also use them as negative embeddings.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/training/text_inversion.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_kxvna0 = { | |
| assets: "/docs/diffusers/pr_11739/en", | |
| base: "/docs/diffusers/pr_11739/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/pr_11739/en/_app/immutable/entry/start.56da5b2b.js"), | |
| import("/docs/diffusers/pr_11739/en/_app/immutable/entry/app.794fd245.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 318], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 41.1 kB
- Xet hash:
- dbc5f6467c7a48db34411ee6f1f16538bf4befa614dff8973b9a3286332cf883
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.