Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / diffusers /pr_12509 /en /optimization /attention_backends.html

rtrm

4 months ago

download

raw

31.6 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Attention backends","local":"attention-backends","sections":[{"title":"set_attention_backend","local":"setattentionbackend","sections":[],"depth":2},{"title":"attention_backend context manager","local":"attentionbackend-context-manager","sections":[],"depth":2},{"title":"Checks","local":"checks","sections":[],"depth":2},{"title":"Available backends","local":"available-backends","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/diffusers/pr_12509/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/entry/start.10f036de.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/scheduler.53228c21.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/singletons.2e0ca52e.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/index.e93d0901.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/paths.bf32c3ea.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/entry/app.1d7e872b.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/preload-helper.19f22cee.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/index.100fac89.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/nodes/0.71bf2f16.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/nodes/245.0bb14ceb.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/CopyLLMTxtMenu.c36f1912.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/globals.7f7f1b26.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/IconCopy.38cf8f56.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.c6997d0b.js">
	<link rel="modulepreload" href="/docs/diffusers/pr_12509/en/_app/immutable/chunks/CodeBlock.d30a6509.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Attention backends","local":"attention-backends","sections":[{"title":"set_attention_backend","local":"setattentionbackend","sections":[],"depth":2},{"title":"attention_backend context manager","local":"attentionbackend-context-manager","sections":[],"depth":2},{"title":"Checks","local":"checks","sections":[],"depth":2},{"title":"Available backends","local":"available-backends","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="attention-backends" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#attention-backends"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Attention backends</span></h1> <blockquote class="note" data-svelte-h="svelte-7ay7vy"><p>The attention dispatcher is an experimental feature. Please open an issue if you have any feedback or encounter any problems.</p></blockquote> <p data-svelte-h="svelte-1p5wykf">Diffusers provides several optimized attention algorithms that are more memory and computationally efficient through it’s <em>attention dispatcher</em>. The dispatcher acts as a router for managing and switching between different attention implementations and provides a unified interface for interacting with them.</p> <p data-svelte-h="svelte-q2vvjg">Refer to the table below for an overview of the available attention families and to the <a href="#available-backends">Available backends</a> section for a more complete list.</p> <table data-svelte-h="svelte-1eo6jrm"><thead><tr><th>attention family</th> <th>main feature</th></tr></thead> <tbody><tr><td>FlashAttention</td> <td>minimizes memory reads/writes through tiling and recomputation</td></tr> <tr><td>SageAttention</td> <td>quantizes attention to int8</td></tr> <tr><td>PyTorch native</td> <td>built-in PyTorch implementation using <a href="./fp16#scaled-dot-product-attention">scaled_dot_product_attention</a></td></tr> <tr><td>xFormers</td> <td>memory-efficient attention with support for various attention kernels</td></tr></tbody></table> <p data-svelte-h="svelte-1o85kpy">This guide will show you how to set and use the different attention backends.</p> <h2 class="relative group"><a id="setattentionbackend" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#setattentionbackend"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>set_attention_backend</span></h2> <p data-svelte-h="svelte-180toyd">The <a href="/docs/diffusers/pr_12509/en/api/models/overview#diffusers.ModelMixin.set_attention_backend">set_attention_backend()</a> method iterates through all the modules in the model and sets the appropriate attention backend to use. The attention backend setting persists until <a href="/docs/diffusers/pr_12509/en/api/models/overview#diffusers.ModelMixin.reset_attention_backend">reset_attention_backend()</a> is called.</p> <p data-svelte-h="svelte-142if5b">The example below demonstrates how to enable the <code>_flash_3_hub</code> implementation for FlashAttention-3 from the <a href="https://github.com/huggingface/kernels" rel="nofollow">kernel</a> library, which allows you to instantly use optimized compute kernels from the Hub without requiring any setup.</p> <blockquote class="note" data-svelte-h="svelte-j6ruup"><p>FlashAttention-3 is not supported for non-Hopper architectures, in which case, use FlashAttention with <code>set_attention_backend("flash")</code>.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> QwenImagePipeline

	pipeline = QwenImagePipeline.from_pretrained(
	<span class="hljs-string">"Qwen/Qwen-Image"</span>, torch_dtype=torch.bfloat16, device_map=<span class="hljs-string">"cuda"</span>
	)
	pipeline.transformer.set_attention_backend(<span class="hljs-string">"_flash_3_hub"</span>)

	prompt = <span class="hljs-string">"""
	cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
	highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
	"""</span>
	pipeline(prompt).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-161gks8">To restore the default attention backend, call <a href="/docs/diffusers/pr_12509/en/api/models/overview#diffusers.ModelMixin.reset_attention_backend">reset_attention_backend()</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline.transformer.reset_attention_backend()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="attentionbackend-context-manager" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#attentionbackend-context-manager"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>attention_backend context manager</span></h2> <p data-svelte-h="svelte-g6fiju">The <a href="https://github.com/huggingface/diffusers/blob/5e181eddfe7e44c1444a2511b0d8e21d177850a0/src/diffusers/models/attention_dispatch.py#L225" rel="nofollow">attention_backend</a> context manager temporarily sets an attention backend for a model within the context. Outside the context, the default attention (PyTorch’s native scaled dot product attention) is used. This is useful if you want to use different backends for different parts of a pipeline or if you want to test the different backends.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> QwenImagePipeline

	pipeline = QwenImagePipeline.from_pretrained(
	<span class="hljs-string">"Qwen/Qwen-Image"</span>, torch_dtype=torch.bfloat16, device_map=<span class="hljs-string">"cuda"</span>
	)
	prompt = <span class="hljs-string">"""
	cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
	highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
	"""</span>

	<span class="hljs-keyword">with</span> attention_backend(<span class="hljs-string">"_flash_3_hub"</span>):
	image = pipeline(prompt).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-1mkj6ep"><p>Most attention backends support <code>torch.compile</code> without graph breaks and can be used to further speed up inference.</p></blockquote> <h2 class="relative group"><a id="checks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#checks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Checks</span></h2> <p data-svelte-h="svelte-fodsmc">The attention dispatcher includes debugging checks that catch common errors before they cause problems.</p> <ol data-svelte-h="svelte-1ylz7pk"><li>Device checks verify that query, key, and value tensors live on the same device.</li> <li>Data type checks confirm tensors have matching dtypes and use either bfloat16 or float16.</li> <li>Shape checks validate tensor dimensions and prevent mixing attention masks with causal flags.</li></ol> <p data-svelte-h="svelte-nw9sbr">Enable these checks by setting the <code>DIFFUSERS_ATTN_CHECKS</code> environment variable. Checks add overhead to every attention operation, so they’re disabled by default.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> DIFFUSERS_ATTN_CHECKS=<span class="hljs-built_in">yes</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9nu3ec">The checks are run now before every attention operation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch

	query = torch.randn(<span class="hljs-number">1</span>, <span class="hljs-number">10</span>, <span class="hljs-number">8</span>, <span class="hljs-number">64</span>, dtype=torch.bfloat16, device=<span class="hljs-string">"cuda"</span>)
	key = torch.randn(<span class="hljs-number">1</span>, <span class="hljs-number">10</span>, <span class="hljs-number">8</span>, <span class="hljs-number">64</span>, dtype=torch.bfloat16, device=<span class="hljs-string">"cuda"</span>)
	value = torch.randn(<span class="hljs-number">1</span>, <span class="hljs-number">10</span>, <span class="hljs-number">8</span>, <span class="hljs-number">64</span>, dtype=torch.bfloat16, device=<span class="hljs-string">"cuda"</span>)

	<span class="hljs-keyword">try</span>:
	<span class="hljs-keyword">with</span> attention_backend(<span class="hljs-string">"flash"</span>):
	output = dispatch_attention_fn(query, key, value)
	<span class="hljs-built_in">print</span>(<span class="hljs-string">"✓ Flash Attention works with checks enabled"</span>)
	<span class="hljs-keyword">except</span> Exception <span class="hljs-keyword">as</span> e:
	<span class="hljs-built_in">print</span>(<span class="hljs-string">f"✗ Flash Attention failed: <span class="hljs-subst">{e}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f6wmbe">You can also configure the registry directly.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.models.attention_dispatch <span class="hljs-keyword">import</span> _AttentionBackendRegistry

	_AttentionBackendRegistry._checks_enabled = <span class="hljs-literal">True</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="available-backends" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#available-backends"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Available backends</span></h2> <p data-svelte-h="svelte-frsh1o">Refer to the table below for a complete list of available attention backends and their variants.</p> <details data-svelte-h="svelte-1a11wal"><summary>Expand</summary> <table><thead><tr><th>Backend Name</th> <th>Family</th> <th>Description</th></tr></thead> <tbody><tr><td><code>native</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>Default backend using PyTorch’s scaled_dot_product_attention</td></tr> <tr><td><code>flex</code></td> <td><a href="https://docs.pytorch.org/docs/stable/nn.attention.flex_attention.html#module-torch.nn.attention.flex_attention" rel="nofollow">FlexAttention</a></td> <td>PyTorch FlexAttention implementation</td></tr> <tr><td><code>_native_cudnn</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>CuDNN-optimized attention</td></tr> <tr><td><code>_native_efficient</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>Memory-efficient attention</td></tr> <tr><td><code>_native_flash</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>PyTorch’s FlashAttention</td></tr> <tr><td><code>_native_math</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>Math-based attention (fallback)</td></tr> <tr><td><code>_native_npu</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>NPU-optimized attention</td></tr> <tr><td><code>_native_xla</code></td> <td><a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend" rel="nofollow">PyTorch native</a></td> <td>XLA-optimized attention</td></tr> <tr><td><code>flash</code></td> <td><a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a></td> <td>FlashAttention-2</td></tr> <tr><td><code>flash_varlen</code></td> <td><a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a></td> <td>Variable length FlashAttention</td></tr> <tr><td><code>_flash_3</code></td> <td><a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a></td> <td>FlashAttention-3</td></tr> <tr><td><code>_flash_varlen_3</code></td> <td><a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a></td> <td>Variable length FlashAttention-3</td></tr> <tr><td><code>_flash_3_hub</code></td> <td><a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">FlashAttention</a></td> <td>FlashAttention-3 from kernels</td></tr> <tr><td><code>sage</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>Quantized attention (INT8 QK)</td></tr> <tr><td><code>sage_varlen</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>Variable length SageAttention</td></tr> <tr><td><code>_sage_qk_int8_pv_fp8_cuda</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>INT8 QK + FP8 PV (CUDA)</td></tr> <tr><td><code>_sage_qk_int8_pv_fp8_cuda_sm90</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>INT8 QK + FP8 PV (SM90)</td></tr> <tr><td><code>_sage_qk_int8_pv_fp16_cuda</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>INT8 QK + FP16 PV (CUDA)</td></tr> <tr><td><code>_sage_qk_int8_pv_fp16_triton</code></td> <td><a href="https://github.com/thu-ml/SageAttention" rel="nofollow">SageAttention</a></td> <td>INT8 QK + FP16 PV (Triton)</td></tr> <tr><td><code>xformers</code></td> <td><a href="https://github.com/facebookresearch/xformers" rel="nofollow">xFormers</a></td> <td>Memory-efficient attention</td></tr></tbody></table></details> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/optimization/attention_backends.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1tmfylu = {
	assets: "/docs/diffusers/pr_12509/en",
	base: "/docs/diffusers/pr_12509/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/diffusers/pr_12509/en/_app/immutable/entry/start.10f036de.js"),
	import("/docs/diffusers/pr_12509/en/_app/immutable/entry/app.1d7e872b.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 245],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 31.6 kB
Xet hash:: 6240f6007df38d2d31c1204d50ecaafaab301d77d23e0fddf4984be08932237d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.