Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / diffusers /pr_10567 /en /using-diffusers /inference_with_tcd_lora.html

38.2 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Trajectory Consistency Distillation-LoRA","local":"trajectory-consistency-distillation-lora","sections":[{"title":"General tasks","local":"general-tasks","sections":[],"depth":2},{"title":"Community models","local":"community-models","sections":[],"depth":2},{"title":"Adapters","local":"adapters","sections":[{"title":"Depth ControlNet","local":"depth-controlnet","sections":[],"depth":3},{"title":"Canny ControlNet","local":"canny-controlnet","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/diffusers/pr_10567/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/entry/start.5ab964f0.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/scheduler.8c3d61f6.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/singletons.1271a703.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/index.0997d446.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/paths.af967ee5.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/entry/app.d83dbfce.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/index.da70eac4.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/nodes/0.bb4a0671.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/nodes/245.6145ac2a.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/Tip.1d9b8c37.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/CodeBlock.00a903b3.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/DocNotebookDropdown.02900f6b.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/globals.7f7f1b26.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/EditOnGithub.1e64e623.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/HfOption.c1483eb1.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_10567/en/_app/immutable/chunks/stores.d6eecc38.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Trajectory Consistency Distillation-LoRA","local":"trajectory-consistency-distillation-lora","sections":[{"title":"General tasks","local":"general-tasks","sections":[],"depth":2},{"title":"Community models","local":"community-models","sections":[],"depth":2},{"title":"Adapters","local":"adapters","sections":[{"title":"Depth ControlNet","local":"depth-controlnet","sections":[],"depth":3},{"title":"Canny ControlNet","local":"canny-controlnet","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="trajectory-consistency-distillation-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trajectory-consistency-distillation-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Trajectory Consistency Distillation-LoRA</span></h1> <p data-svelte-h="svelte-1ipiy2w">Trajectory Consistency Distillation (TCD) enables a model to generate higher quality and more detailed images with fewer steps. Moreover, owing to the effective error mitigation during the distillation process, TCD demonstrates superior performance even under conditions of large inference steps.</p> <p data-svelte-h="svelte-lghdda">The major advantages of TCD are:</p> <ul data-svelte-h="svelte-s8ifhe"><li><p>Better than Teacher: TCD demonstrates superior generative quality at both small and large inference steps and exceeds the performance of <a href="../../api/schedulers/multistep_dpm_solver">DPM-Solver++(2S)</a> with Stable Diffusion XL (SDXL). There is no additional discriminator or LPIPS supervision included during TCD training.</p></li> <li><p>Flexible Inference Steps: The inference steps for TCD sampling can be freely adjusted without adversely affecting the image quality.</p></li> <li><p>Freely change detail level: During inference, the level of detail in the image can be adjusted with a single hyperparameter, <em>gamma</em>.</p></li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1apoih8">For more technical details of TCD, please refer to the <a href="https://arxiv.org/abs/2402.19159" rel="nofollow">paper</a> or official <a href="https://mhh0318.github.io/tcd/" rel="nofollow">project page</a>).</p></div> <p data-svelte-h="svelte-ss6l19">For large models like SDXL, TCD is trained with <a href="https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora" rel="nofollow">LoRA</a> to reduce memory usage. This is also useful because you can reuse LoRAs between different finetuned models, as long as they share the same base model, without further training.</p> <p data-svelte-h="svelte-4sxfqt">This guide will show you how to perform inference with TCD-LoRAs for a variety of tasks like text-to-image and inpainting, as well as how you can easily combine TCD-LoRAs with other adapters. Choose one of the supported base model and it’s corresponding TCD-LoRA checkpoint from the table below to get started.</p> <table data-svelte-h="svelte-143krth"><thead><tr><th>Base model</th> <th>TCD-LoRA checkpoint</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5" rel="nofollow">stable-diffusion-v1-5</a></td> <td><a href="https://huggingface.co/h1t/TCD-SD15-LoRA" rel="nofollow">TCD-SD15</a></td></tr> <tr><td><a href="https://huggingface.co/stabilityai/stable-diffusion-2-1-base" rel="nofollow">stable-diffusion-2-1-base</a></td> <td><a href="https://huggingface.co/h1t/TCD-SD21-base-LoRA" rel="nofollow">TCD-SD21-base</a></td></tr> <tr><td><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0" rel="nofollow">stable-diffusion-xl-base-1.0</a></td> <td><a href="https://huggingface.co/h1t/TCD-SDXL-LoRA" rel="nofollow">TCD-SDXL</a></td></tr></tbody></table> <p data-svelte-h="svelte-18m9u5y">Make sure you have <a href="https://github.com/huggingface/peft" rel="nofollow">PEFT</a> installed for better LoRA support.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U peft<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="general-tasks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general-tasks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>General tasks</span></h2> <p data-svelte-h="svelte-1n6zxug">In this guide, let’s use the <a href="/docs/diffusers/pr_10567/en/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline">StableDiffusionXLPipeline</a> and the <a href="/docs/diffusers/pr_10567/en/api/schedulers/tcd#diffusers.TCDScheduler">TCDScheduler</a>. Use the <a href="/docs/diffusers/pr_10567/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> method to load the SDXL-compatible TCD-LoRA weights.</p> <p data-svelte-h="svelte-49ynuh">A few tips to keep in mind for TCD-LoRA inference are to:</p> <ul data-svelte-h="svelte-xy5eiv"><li>Keep the <code>num_inference_steps</code> between 4 and 50</li> <li>Set <code>eta</code> (used to control stochasticity at each step) between 0 and 1. You should use a higher <code>eta</code> when increasing the number of inference steps, but the downside is that a larger <code>eta</code> in <a href="/docs/diffusers/pr_10567/en/api/schedulers/tcd#diffusers.TCDScheduler">TCDScheduler</a> leads to blurrier images. A value of 0.3 is recommended to produce good results.</li></ul> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">text-to-image </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">inpainting </div></div> <div class="language-select"><div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline, TCDScheduler

	device = <span class="hljs-string">"cuda"</span>
	base_model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>
	tcd_lora_id = <span class="hljs-string">"h1t/TCD-SDXL-LoRA"</span>

	pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>).to(device)
	pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

	pipe.load_lora_weights(tcd_lora_id)
	pipe.fuse_lora()

	prompt = <span class="hljs-string">"Painting of the orange cat Otto von Garfield, Count of Bismarck-Schönhausen, Duke of Lauenburg, Minister-President of Prussia. Depicted wearing a Prussian Pickelhaube and eating his favorite meal - lasagna."</span>

	image = pipe(
	prompt=prompt,
	num_inference_steps=<span class="hljs-number">4</span>,
	guidance_scale=<span class="hljs-number">0</span>,
	eta=<span class="hljs-number">0.3</span>,
	generator=torch.Generator(device=device).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nuvmzb"><img src="https://github.com/jabir-zheng/TCD/raw/main/assets/demo_image.png"></p> </div> <h2 class="relative group"><a id="community-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#community-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Community models</span></h2> <p data-svelte-h="svelte-188pw20">TCD-LoRA also works with many community finetuned models and plugins. For example, load the <a href="https://huggingface.co/cagliostrolab/animagine-xl-3.0" rel="nofollow">animagine-xl-3.0</a> checkpoint which is a community finetuned version of SDXL for generating anime images.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline, TCDScheduler

	device = <span class="hljs-string">"cuda"</span>
	base_model_id = <span class="hljs-string">"cagliostrolab/animagine-xl-3.0"</span>
	tcd_lora_id = <span class="hljs-string">"h1t/TCD-SDXL-LoRA"</span>

	pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>).to(device)
	pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

	pipe.load_lora_weights(tcd_lora_id)
	pipe.fuse_lora()

	prompt = <span class="hljs-string">"A man, clad in a meticulously tailored military uniform, stands with unwavering resolve. The uniform boasts intricate details, and his eyes gleam with determination. Strands of vibrant, windswept hair peek out from beneath the brim of his cap."</span>

	image = pipe(
	prompt=prompt,
	num_inference_steps=<span class="hljs-number">8</span>,
	guidance_scale=<span class="hljs-number">0</span>,
	eta=<span class="hljs-number">0.3</span>,
	generator=torch.Generator(device=device).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-iffd98"><img src="https://github.com/jabir-zheng/TCD/raw/main/assets/animagine_xl.png"></p> <p data-svelte-h="svelte-1bzy2jr">TCD-LoRA also supports other LoRAs trained on different styles. For example, let’s load the <a href="https://huggingface.co/TheLastBen/Papercut_SDXL" rel="nofollow">TheLastBen/Papercut_SDXL</a> LoRA and fuse it with the TCD-LoRA with the <code>~loaders.UNet2DConditionLoadersMixin.set_adapters</code> method.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-4z0umm">Check out the <a href="merge_loras">Merge LoRAs</a> guide to learn more about efficient merging methods.</p></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline
	<span class="hljs-keyword">from</span> scheduling_tcd <span class="hljs-keyword">import</span> TCDScheduler

	device = <span class="hljs-string">"cuda"</span>
	base_model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>
	tcd_lora_id = <span class="hljs-string">"h1t/TCD-SDXL-LoRA"</span>
	styled_lora_id = <span class="hljs-string">"TheLastBen/Papercut_SDXL"</span>

	pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>).to(device)
	pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

	pipe.load_lora_weights(tcd_lora_id, adapter_name=<span class="hljs-string">"tcd"</span>)
	pipe.load_lora_weights(styled_lora_id, adapter_name=<span class="hljs-string">"style"</span>)
	pipe.set_adapters([<span class="hljs-string">"tcd"</span>, <span class="hljs-string">"style"</span>], adapter_weights=[<span class="hljs-number">1.0</span>, <span class="hljs-number">1.0</span>])

	prompt = <span class="hljs-string">"papercut of a winter mountain, snow"</span>

	image = pipe(
	prompt=prompt,
	num_inference_steps=<span class="hljs-number">4</span>,
	guidance_scale=<span class="hljs-number">0</span>,
	eta=<span class="hljs-number">0.3</span>,
	generator=torch.Generator(device=device).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-64b3tk"><img src="https://github.com/jabir-zheng/TCD/raw/main/assets/styled_lora.png"></p> <h2 class="relative group"><a id="adapters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#adapters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Adapters</span></h2> <p data-svelte-h="svelte-jvx8ap">TCD-LoRA is very versatile, and it can be combined with other adapter types like ControlNets, IP-Adapter, and AnimateDiff.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">ControlNet </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">IP-Adapter </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">AnimateDiff </div></div> <div class="language-select"> <h3 class="relative group"><a id="depth-controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#depth-controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Depth ControlNet</span></h3> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DPTImageProcessor, DPTForDepthEstimation
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ControlNetModel, StableDiffusionXLControlNetPipeline
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
	<span class="hljs-keyword">from</span> scheduling_tcd <span class="hljs-keyword">import</span> TCDScheduler

	device = <span class="hljs-string">"cuda"</span>
	depth_estimator = DPTForDepthEstimation.from_pretrained(<span class="hljs-string">"Intel/dpt-hybrid-midas"</span>).to(device)
	feature_extractor = DPTImageProcessor.from_pretrained(<span class="hljs-string">"Intel/dpt-hybrid-midas"</span>)

	<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_depth_map</span>(<span class="hljs-params">image</span>):
	image = feature_extractor(images=image, return_tensors=<span class="hljs-string">"pt"</span>).pixel_values.to(device)
	<span class="hljs-keyword">with</span> torch.no_grad(), torch.autocast(device):
	depth_map = depth_estimator(image).predicted_depth

	depth_map = torch.nn.functional.interpolate(
	depth_map.unsqueeze(<span class="hljs-number">1</span>),
	size=(<span class="hljs-number">1024</span>, <span class="hljs-number">1024</span>),
	mode=<span class="hljs-string">"bicubic"</span>,
	align_corners=<span class="hljs-literal">False</span>,
	)
	depth_min = torch.amin(depth_map, dim=[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>], keepdim=<span class="hljs-literal">True</span>)
	depth_max = torch.amax(depth_map, dim=[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>], keepdim=<span class="hljs-literal">True</span>)
	depth_map = (depth_map - depth_min) / (depth_max - depth_min)
	image = torch.cat([depth_map] * <span class="hljs-number">3</span>, dim=<span class="hljs-number">1</span>)

	image = image.permute(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>).cpu().numpy()[<span class="hljs-number">0</span>]
	image = Image.fromarray((image * <span class="hljs-number">255.0</span>).clip(<span class="hljs-number">0</span>, <span class="hljs-number">255</span>).astype(np.uint8))
	<span class="hljs-keyword">return</span> image

	base_model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>
	controlnet_id = <span class="hljs-string">"diffusers/controlnet-depth-sdxl-1.0"</span>
	tcd_lora_id = <span class="hljs-string">"h1t/TCD-SDXL-LoRA"</span>

	controlnet = ControlNetModel.from_pretrained(
	controlnet_id,
	torch_dtype=torch.float16,
	variant=<span class="hljs-string">"fp16"</span>,
	)
	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	base_model_id,
	controlnet=controlnet,
	torch_dtype=torch.float16,
	variant=<span class="hljs-string">"fp16"</span>,
	)
	pipe.enable_model_cpu_offload()

	pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

	pipe.load_lora_weights(tcd_lora_id)
	pipe.fuse_lora()

	prompt = <span class="hljs-string">"stormtrooper lecture, photorealistic"</span>

	image = load_image(<span class="hljs-string">"https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png"</span>)
	depth_image = get_depth_map(image)

	controlnet_conditioning_scale = <span class="hljs-number">0.5</span> <span class="hljs-comment"># recommended for good generalization</span>

	image = pipe(
	prompt,
	image=depth_image,
	num_inference_steps=<span class="hljs-number">4</span>,
	guidance_scale=<span class="hljs-number">0</span>,
	eta=<span class="hljs-number">0.3</span>,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	generator=torch.Generator(device=device).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]

	grid_image = make_image_grid([depth_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jfsnb4"><img src="https://github.com/jabir-zheng/TCD/raw/main/assets/controlnet_depth_tcd.png"></p> <h3 class="relative group"><a id="canny-controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#canny-controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Canny ControlNet</span></h3> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ControlNetModel, StableDiffusionXLControlNetPipeline
	<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid
	<span class="hljs-keyword">from</span> scheduling_tcd <span class="hljs-keyword">import</span> TCDScheduler

	device = <span class="hljs-string">"cuda"</span>
	base_model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>
	controlnet_id = <span class="hljs-string">"diffusers/controlnet-canny-sdxl-1.0"</span>
	tcd_lora_id = <span class="hljs-string">"h1t/TCD-SDXL-LoRA"</span>

	controlnet = ControlNetModel.from_pretrained(
	controlnet_id,
	torch_dtype=torch.float16,
	variant=<span class="hljs-string">"fp16"</span>,
	)
	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	base_model_id,
	controlnet=controlnet,
	torch_dtype=torch.float16,
	variant=<span class="hljs-string">"fp16"</span>,
	)
	pipe.enable_model_cpu_offload()

	pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

	pipe.load_lora_weights(tcd_lora_id)
	pipe.fuse_lora()

	prompt = <span class="hljs-string">"ultrarealistic shot of a furry blue bird"</span>

	canny_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"</span>)

	controlnet_conditioning_scale = <span class="hljs-number">0.5</span> <span class="hljs-comment"># recommended for good generalization</span>

	image = pipe(
	prompt,
	image=canny_image,
	num_inference_steps=<span class="hljs-number">4</span>,
	guidance_scale=<span class="hljs-number">0</span>,
	eta=<span class="hljs-number">0.3</span>,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	generator=torch.Generator(device=device).manual_seed(<span class="hljs-number">0</span>),
	).images[<span class="hljs-number">0</span>]

	grid_image = make_image_grid([canny_image, image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-x8em4a"><img src="https://github.com/jabir-zheng/TCD/raw/main/assets/controlnet_canny_tcd.png"></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one.</div> </div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/using-diffusers/inference_with_tcd_lora.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_inhvqu = {
	assets: "/docs/diffusers/pr_10567/en",
	base: "/docs/diffusers/pr_10567/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/diffusers/pr_10567/en/_app/immutable/entry/start.5ab964f0.js"),
	import("/docs/diffusers/pr_10567/en/_app/immutable/entry/app.d83dbfce.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 245],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 38.2 kB
Xet hash:: f2690ec7d003bba3e1f761b9897394460a81a66275149cf0ef238e72f69e3ba4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.