Buckets:

hf-doc-build/doc-dev / diffusers /pr_11739 /zh /training /distributed_inference.html
rtrm's picture
download
raw
39.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;分布式推理&quot;,&quot;local&quot;:&quot;分布式推理&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🤗 Accelerate&quot;,&quot;local&quot;:&quot;-accelerate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch Distributed&quot;,&quot;local&quot;:&quot;pytorch-distributed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;模型分片&quot;,&quot;local&quot;:&quot;模型分片&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/diffusers/pr_11739/zh/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/entry/start.95a8faef.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/scheduler.e4ff9b64.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/singletons.0a6f1d19.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/index.f9be34a7.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/paths.37f6e25a.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/entry/app.a988cdaf.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/preload-helper.3e2c3f46.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/index.09f1bca0.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/nodes/0.0ec3fec6.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/nodes/43.67ecf388.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.f5199cd9.js">
<link rel="modulepreload" href="/docs/diffusers/pr_11739/zh/_app/immutable/chunks/CodeBlock.1680a1fd.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;分布式推理&quot;,&quot;local&quot;:&quot;分布式推理&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🤗 Accelerate&quot;,&quot;local&quot;:&quot;-accelerate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch Distributed&quot;,&quot;local&quot;:&quot;pytorch-distributed&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;模型分片&quot;,&quot;local&quot;:&quot;模型分片&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="分布式推理" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#分布式推理"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>分布式推理</span></h1> <p data-svelte-h="svelte-164t5yx">在分布式设置中,您可以使用 🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a><a href="https://pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">PyTorch Distributed</a> 在多个 GPU 上运行推理,这对于并行生成多个提示非常有用。</p> <p data-svelte-h="svelte-qajcxl">本指南将向您展示如何使用 🤗 Accelerate 和 PyTorch Distributed 进行分布式推理。</p> <h2 class="relative group"><a id="-accelerate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-accelerate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🤗 Accelerate</span></h2> <p data-svelte-h="svelte-10vyc10">🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 是一个旨在简化在分布式设置中训练或运行推理的库。它简化了设置分布式环境的过程,让您可以专注于您的 PyTorch 代码。</p> <p data-svelte-h="svelte-k495sb">首先,创建一个 Python 文件并初始化一个 <code>accelerate.PartialState</code> 来创建分布式环境;您的设置会自动检测,因此您无需明确定义 <code>rank</code><code>world_size</code>。将 <code>DiffusionPipeline</code> 移动到 <code>distributed_state.device</code> 以为每个进程分配一个 GPU。</p> <p data-svelte-h="svelte-1224dwq">现在使用 <code>split_between_processes</code> 实用程序作为上下文管理器,自动在进程数之间分发提示。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipeline = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)
distributed_state = PartialState()
pipeline.to(distributed_state.device)
<span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">&quot;a dog&quot;</span>, <span class="hljs-string">&quot;a cat&quot;</span>]) <span class="hljs-keyword">as</span> prompt:
result = pipeline(prompt).images[<span class="hljs-number">0</span>]
result.save(<span class="hljs-string">f&quot;result_<span class="hljs-subst">{distributed_state.process_index}</span>.png&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1eq5z11">使用 <code>--num_processes</code> 参数指定要使用的 GPU 数量,并调用 <code>accelerate launch</code> 来运行脚本:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch run_distributed.py --num_processes=2<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-87d30k"><p>参考这个最小示例 <a href="https://gist.github.com/sayakpaul/cfaebd221820d7b43fae638b4dfa01ba" rel="nofollow">脚本</a> 以在多个 GPU 上运行推理。要了解更多信息,请查看 <a href="https://huggingface.co/docs/accelerate/en/usage_guides/distributed_inference#distributed-inference-with-accelerate" rel="nofollow">使用 🤗 Accelerate 进行分布式推理</a> 指南。</p></blockquote> <h2 class="relative group"><a id="pytorch-distributed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pytorch-distributed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PyTorch Distributed</span></h2> <p data-svelte-h="svelte-7gn49b">PyTorch 支持 <a href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html" rel="nofollow"><code>DistributedDataParallel</code></a>,它启用了数据
并行性。</p> <p data-svelte-h="svelte-1vhpcpe">首先,创建一个 Python 文件并导入 <code>torch.distributed</code><code>torch.multiprocessing</code> 来设置分布式进程组,并为每个 GPU 上的推理生成进程。您还应该初始化一个 <code>DiffusionPipeline</code></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist
<span class="hljs-keyword">import</span> torch.multiprocessing <span class="hljs-keyword">as</span> mp
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
sd = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-109hrfe">您需要创建一个函数来运行推理;<a href="https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group" rel="nofollow"><code>init_process_group</code></a> 处理创建一个分布式环境,指定要使用的后端类型、当前进程的 <code>rank</code> 以及参与进程的数量 <code>world_size</code>。如果您在 2 个 GPU 上并行运行推理,那么 <code>world_size</code> 就是 2。</p> <p data-svelte-h="svelte-1qcsr3i"><code>DiffusionPipeline</code> 移动到 <code>rank</code>,并使用 <code>get_rank</code> 为每个进程分配一个 GPU,其中每个进程处理不同的提示:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>):
dist.init_process_group(<span class="hljs-string">&quot;nccl&quot;</span>, rank=rank, world_size=world_size)
sd.to(rank)
<span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>:
prompt = <span class="hljs-string">&quot;a dog&quot;</span>
<span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>:
prompt = <span class="hljs-string">&quot;a cat&quot;</span>
image = sd(prompt).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">f&quot;./<span class="hljs-subst">{<span class="hljs-string">&#x27;_&#x27;</span>.join(prompt)}</span>.png&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1yf0k86">要运行分布式推理,调用 <a href="https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn" rel="nofollow"><code>mp.spawn</code></a><code>world_size</code> 定义的 GPU 数量上运行 <code>run_inference</code> 函数:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
world_size = <span class="hljs-number">2</span>
mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=<span class="hljs-literal">True</span>)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ebzc0p">完成推理脚本后,使用 <code>--nproc_per_node</code> 参数指定要使用的 GPU 数量,并调用 <code>torchrun</code> 来运行脚本:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->torchrun run_distributed.py --nproc_per_node=2<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-zkfe1n"><p>您可以在 <code>DiffusionPipeline</code> 中使用 <code>device_map</code> 将其模型级组件分布在多个设备上。请参考 <a href="../tutorials/inference_with_big_models#device-placement">设备放置</a> 指南了解更多信息。</p></blockquote> <h2 class="relative group"><a id="模型分片" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#模型分片"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>模型分片</span></h2> <p data-svelte-h="svelte-yu6o1n">现代扩散系统,如 <a href="../api/pipelines/flux">Flux</a>,非常大且包含多个模型。例如,<a href="https://hf.co/black-forest-labs/FLUX.1-dev" rel="nofollow">Flux.1-Dev</a> 由两个文本编码器 - <a href="https://hf.co/google/t5-v1_1-xxl" rel="nofollow">T5-XXL</a><a href="https://hf.co/openai/clip-vit-large-patch14" rel="nofollow">CLIP-L</a> - 一个 <a href="../api/models/flux_transformer">扩散变换器</a>,以及一个 <a href="../api/models/autoencoderkl">VAE</a> 组成。对于如此大的模型,在消费级 GPU 上运行推理可能具有挑战性。</p> <p data-svelte-h="svelte-1wtcutz">模型分片是一种技术,当模型无法容纳在单个 GPU 上时,将模型分布在多个 GPU 上。下面的示例假设有两个 16GB GPU 可用于推理。</p> <p data-svelte-h="svelte-vjsvbo">开始使用文本编码器计算文本嵌入。通过设置 <code>device_map=&quot;balanced&quot;</code> 将文本编码器保持在两个GPU上。<code>balanced</code> 策略将模型均匀分布在所有可用GPU上。使用 <code>max_memory</code> 参数为每个GPU上的每个文本编码器分配最大内存量。</p> <blockquote class="tip" data-svelte-h="svelte-zyi0qz"><p><strong></strong> 在此步骤加载文本编码器!扩散变换器和VAE在后续步骤中加载以节省内存。</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">import</span> torch
prompt = <span class="hljs-string">&quot;a photo of a dog with cat-like look&quot;</span>
pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
transformer=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
device_map=<span class="hljs-string">&quot;balanced&quot;</span>,
max_memory={<span class="hljs-number">0</span>: <span class="hljs-string">&quot;16GB&quot;</span>, <span class="hljs-number">1</span>: <span class="hljs-string">&quot;16GB&quot;</span>},
torch_dtype=torch.bfloat16
)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Encoding prompts.&quot;</span>)
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
prompt=prompt, prompt_2=<span class="hljs-literal">None</span>, max_sequence_length=<span class="hljs-number">512</span>
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-101ikwk">一旦文本嵌入计算完成,从GPU中移除它们以为扩散变换器腾出空间。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> gc
<span class="hljs-keyword">def</span> <span class="hljs-title function_">flush</span>():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
<span class="hljs-keyword">del</span> pipeline.text_encoder
<span class="hljs-keyword">del</span> pipeline.text_encoder_2
<span class="hljs-keyword">del</span> pipeline.tokenizer
<span class="hljs-keyword">del</span> pipeline.tokenizer_2
<span class="hljs-keyword">del</span> pipeline
flush()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-i97tc3">接下来加载扩散变换器,它有125亿参数。这次,设置 <code>device_map=&quot;auto&quot;</code> 以自动将模型分布在两个16GB GPU上。<code>auto</code> 策略由 <a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 支持,并作为 <a href="https://hf.co/docs/accelerate/concept_guides/big_model_inference" rel="nofollow">大模型推理</a> 功能的一部分可用。它首先将模型分布在最快的设备(GPU)上,然后在需要时移动到较慢的设备如CPU和硬盘。将模型参数存储在较慢设备上的权衡是推理延迟较慢。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel
<span class="hljs-keyword">import</span> torch
transformer = AutoModel.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
subfolder=<span class="hljs-string">&quot;transformer&quot;</span>,
device_map=<span class="hljs-string">&quot;auto&quot;</span>,
torch_dtype=torch.bfloat16
)<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-1l0lzn4"><p>在任何时候,您可以尝试 <code>print(pipeline.hf_device_map)</code> 来查看各种模型如何在设备上分布。这对于跟踪模型的设备放置很有用。您也可以尝试 <code>print(transformer.hf_device_map)</code> 来查看变换器模型如何在设备上分片。</p></blockquote> <p data-svelte-h="svelte-1lgsyuc">将变换器模型添加到管道中以进行去噪,但将其他模型级组件如文本编码器和VAE设置为 <code>None</code>,因为您还不需要它们。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
text_encoder=<span class="hljs-literal">None</span>,
text_encoder_2=<span class="hljs-literal">None</span>,
tokenizer=<span class="hljs-literal">None</span>,
tokenizer_2=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
transformer=transformer,
torch_dtype=torch.bfloat16
)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Running denoising.&quot;</span>)
height, width = <span class="hljs-number">768</span>, <span class="hljs-number">1360</span>
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=<span class="hljs-number">50</span>,
guidance_scale=<span class="hljs-number">3.5</span>,
height=height,
width=width,
output_type=<span class="hljs-string">&quot;latent&quot;</span>,
).images<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-x0nor">从内存中移除管道和变换器,因为它们不再需要。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">del</span> pipeline.transformer
<span class="hljs-keyword">del</span> pipeline
flush()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1v7ewyt">最后,使用变分自编码器(VAE)将潜在表示解码为图像。VAE通常足够小,可以在单个GPU上加载。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKL
<span class="hljs-keyword">from</span> diffusers.image_processor <span class="hljs-keyword">import</span> VaeImageProcessor
<span class="hljs-keyword">import</span> torch
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder=<span class="hljs-string">&quot;vae&quot;</span>, torch_dtype=torch.bfloat16).to(<span class="hljs-string">&quot;cuda&quot;</span>)
vae_scale_factor = <span class="hljs-number">2</span> ** (<span class="hljs-built_in">len</span>(vae.config.block_out_channels) - <span class="hljs-number">1</span>)
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;运行解码中。&quot;</span>)
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>]
image = image_processor.postprocess(image, output_type=<span class="hljs-string">&quot;pil&quot;</span>)
image[<span class="hljs-number">0</span>].save(<span class="hljs-string">&quot;split_transformer.png&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19rooo6">通过选择性加载和卸载在特定阶段所需的模型,并将最大模型分片到多个GPU上,可以在消费级GPU上运行大型模型的推理。</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/zh/training/distributed_inference.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_pa8cr6 = {
assets: "/docs/diffusers/pr_11739/zh",
base: "/docs/diffusers/pr_11739/zh",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/diffusers/pr_11739/zh/_app/immutable/entry/start.95a8faef.js"),
import("/docs/diffusers/pr_11739/zh/_app/immutable/entry/app.a988cdaf.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 43],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
39.9 kB
·
Xet hash:
932d2bd8d4e3457283f5a220573fae8178f3d8dabc3b248df7c1bbcc0f1446e9

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.