Buckets:

rtrm's picture
download
raw
25.3 kB
import{s as Ot,o as Dt,n as Pt}from"../chunks/scheduler.e4ff9b64.js";import{S as Kt,i as tl,e as m,s as p,c as T,h as ll,a as d,d as e,b as o,f as zt,g as h,j as u,k as O,l as el,m as s,n as M,t as f,o as V,p as j}from"../chunks/index.09f1bca0.js";import{C as sl,H as K,E as al}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.dc35afd8.js";import{C as tt}from"../chunks/CodeBlock.7e1077c6.js";import{H as nl,a as At}from"../chunks/HfOption.44827c7f.js";function il(y){let i,J="我们可以通过使用<code>EvaluationAgent</code>加载和评估优化后的模型,并将其传递给<code>Task</code>。",c,r,n;return r=new tt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQWZyb20lMjBwcnVuYS5kYXRhLnBydW5hX2RhdGFtb2R1bGUlMjBpbXBvcnQlMjBQcnVuYURhdGFNb2R1bGUlMEFmcm9tJTIwcHJ1bmEuZXZhbHVhdGlvbi5ldmFsdWF0aW9uX2FnZW50JTIwaW1wb3J0JTIwRXZhbHVhdGlvbkFnZW50JTBBZnJvbSUyMHBydW5hLmV2YWx1YXRpb24ubWV0cmljcyUyMGltcG9ydCUyMCglMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyJTJDJTBBJTIwJTIwJTIwJTIwVG90YWxUaW1lTWV0cmljJTJDJTBBKSUwQWZyb20lMjBwcnVuYS5ldmFsdWF0aW9uLnRhc2slMjBpbXBvcnQlMjBUYXNrJTBBJTBBJTIzJTIwZGVmaW5lJTIwdGhlJTIwZGV2aWNlJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUyMGlmJTIwdG9yY2guY3VkYS5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJtcHMlMjIlMjBpZiUyMHRvcmNoLmJhY2tlbmRzLm1wcy5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJjcHUlMjIlMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTUlMjBQcnVuYUFJJTJGU2VnbWluZC1WZWdhLXNtYXNoZWQlMjAlRTYlODglOTYlMjBQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbC5mcm9tX2h1YiglMjJQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNiU4QyU4NyVFNiVBMCU4NyUwQW1ldHJpY3MlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjBUb3RhbFRpbWVNZXRyaWMobl9pdGVyYXRpb25zJTNEMjAlMkMlMjBuX3dhcm11cF9pdGVyYXRpb25zJTNENSklMkMlMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljKG5faXRlcmF0aW9ucyUzRDIwJTJDJTIwbl93YXJtdXBfaXRlcmF0aW9ucyUzRDUpJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyKCUyMmNsaXAlMjIpJTJDJTBBJTVEJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU2JTk1JUIwJUU2JThEJUFFJUU2JUE4JUExJUU1JTlEJTk3JTBBZGF0YW1vZHVsZSUyMCUzRCUyMFBydW5hRGF0YU1vZHVsZS5mcm9tX3N0cmluZyglMjJMQUlPTjI1NiUyMiklMEFkYXRhbW9kdWxlLmxpbWl0X2RhdGFzZXRzKDEwKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNCVCQiVCQiVFNSU4QSVBMSVFNSU5MiU4QyVFOCVBRiU4NCVFNCVCQyVCMCVFNCVCQiVBMyVFNyU5MCU4NiUwQXRhc2slMjAlM0QlMjBUYXNrKG1ldHJpY3MlMkMlMjBkYXRhbW9kdWxlJTNEZGF0YW1vZHVsZSUyQyUyMGRldmljZSUzRGRldmljZSklMEFldmFsX2FnZW50JTIwJTNEJTIwRXZhbHVhdGlvbkFnZW50KHRhc2spJTBBJTBBJTIzJTIwJUU4JUFGJTg0JUU0JUJDJUIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJUU1JUI5JUI2JUU1JThEJUI4JUU4JUJEJUJEJUU1JTg4JUIwQ1BVJTBBc21hc2hlZF9waXBlLm1vdmVfdG9fZGV2aWNlKGRldmljZSklMEFzbWFzaGVkX3BpcGVfcmVzdWx0cyUyMCUzRCUyMGV2YWxfYWdlbnQuZXZhbHVhdGUoc21hc2hlZF9waXBlKSUwQXNtYXNoZWRfcGlwZS5tb3ZlX3RvX2RldmljZSglMjJjcHUlMjIp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel
<span class="hljs-keyword">from</span> pruna.data.pruna_datamodule <span class="hljs-keyword">import</span> PrunaDataModule
<span class="hljs-keyword">from</span> pruna.evaluation.evaluation_agent <span class="hljs-keyword">import</span> EvaluationAgent
<span class="hljs-keyword">from</span> pruna.evaluation.metrics <span class="hljs-keyword">import</span> (
ThroughputMetric,
TorchMetricWrapper,
TotalTimeMetric,
)
<span class="hljs-keyword">from</span> pruna.evaluation.task <span class="hljs-keyword">import</span> Task
<span class="hljs-comment"># define the device</span>
device = <span class="hljs-string">&quot;cuda&quot;</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">&quot;mps&quot;</span> <span class="hljs-keyword">if</span> torch.backends.mps.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">&quot;cpu&quot;</span>
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span>
smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">&quot;PrunaAI/FLUX.1-dev-smashed&quot;</span>)
<span class="hljs-comment"># 定义指标</span>
metrics = [
TotalTimeMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>),
ThroughputMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>),
TorchMetricWrapper(<span class="hljs-string">&quot;clip&quot;</span>),
]
<span class="hljs-comment"># 定义数据模块</span>
datamodule = PrunaDataModule.from_string(<span class="hljs-string">&quot;LAION256&quot;</span>)
datamodule.limit_datasets(<span class="hljs-number">10</span>)
<span class="hljs-comment"># 定义任务和评估代理</span>
task = Task(metrics, datamodule=datamodule, device=device)
eval_agent = EvaluationAgent(task)
<span class="hljs-comment"># 评估优化模型并卸载到CPU</span>
smashed_pipe.move_to_device(device)
smashed_pipe_results = eval_agent.evaluate(smashed_pipe)
smashed_pipe.move_to_device(<span class="hljs-string">&quot;cpu&quot;</span>)`,wrap:!1}}),{c(){i=m("p"),i.innerHTML=J,c=p(),T(r.$$.fragment)},l(a){i=d(a,"P",{"data-svelte-h":!0}),u(i)!=="svelte-1wfu4ax"&&(i.innerHTML=J),c=o(a),h(r.$$.fragment,a)},m(a,U){s(a,i,U),s(a,c,U),M(r,a,U),n=!0},p:Pt,i(a){n||(f(r.$$.fragment,a),n=!0)},o(a){V(r.$$.fragment,a),n=!1},d(a){a&&(e(i),e(c)),j(r,a)}}}function pl(y){let i,J="除了比较优化模型与基础模型,您还可以评估独立的 <code>diffusers</code> 模型。这在您想评估模型性能而不考虑优化时非常有用。我们可以通过使用 <code>PrunaModel</code> 包装器并运行 <code>EvaluationAgent</code> 来实现。",c,r,n;return r=new tt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQSUyMyUyMCVFNCVCRCVCRiVFNyU5NCVBOCVFNSVCMCU4RkdQVSVFNSU4NiU4NSVFNSVBRCU5OCVFNSVCMCU5RCVFOCVBRiU5NSUyMFBydW5hQUklMkZTZWdtaW5kLVZlZ2Etc21hc2hlZCUyMCVFNiU4OCU5NiUyMFBydW5hQUklMkZGTFVYLjEtZGV2LXNtYXNoZWQlMEFwaXBlJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKS50byglMjJjcHUlMjIpJTBBd3JhcHBlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbChtb2RlbCUzRHBpcGUp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span>
pipe = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
torch_dtype=torch.bfloat16
).to(<span class="hljs-string">&quot;cpu&quot;</span>)
wrapped_pipe = PrunaModel(model=pipe)`,wrap:!1}}),{c(){i=m("p"),i.innerHTML=J,c=p(),T(r.$$.fragment)},l(a){i=d(a,"P",{"data-svelte-h":!0}),u(i)!=="svelte-im1gl4"&&(i.innerHTML=J),c=o(a),h(r.$$.fragment,a)},m(a,U){s(a,i,U),s(a,c,U),M(r,a,U),n=!0},p:Pt,i(a){n||(f(r.$$.fragment,a),n=!0)},o(a){V(r.$$.fragment,a),n=!1},d(a){a&&(e(i),e(c)),j(r,a)}}}function ol(y){let i,J,c,r;return i=new At({props:{id:"eval",option:"optimized model",$$slots:{default:[il]},$$scope:{ctx:y}}}),c=new At({props:{id:"eval",option:"standalone model",$$slots:{default:[pl]},$$scope:{ctx:y}}}),{c(){T(i.$$.fragment),J=p(),T(c.$$.fragment)},l(n){h(i.$$.fragment,n),J=o(n),h(c.$$.fragment,n)},m(n,a){M(i,n,a),s(n,J,a),M(c,n,a),r=!0},p(n,a){const U={};a&2&&(U.$$scope={dirty:a,ctx:n}),i.$set(U);const N={};a&2&&(N.$$scope={dirty:a,ctx:n}),c.$set(N)},i(n){r||(f(i.$$.fragment,n),f(c.$$.fragment,n),r=!0)},o(n){V(i.$$.fragment,n),V(c.$$.fragment,n),r=!1},d(n){n&&e(J),j(i,n),j(c,n)}}}function rl(y){let i,J,c,r,n,a,U,N,I,Nt='<a href="https://github.com/PrunaAI/pruna" rel="nofollow">Pruna</a> 是一个模型优化框架,提供多种优化方法——量化、剪枝、缓存、编译——以加速推理并减少内存使用。以下是优化方法的概览。',lt,$,It='<thead><tr><th>技术</th> <th>描述</th> <th align="center">速度</th> <th align="center">内存</th> <th align="center">质量</th></tr></thead> <tbody><tr><td><code>batcher</code></td> <td>将多个输入分组在一起同时处理,提高计算效率并减少处理时间。</td> <td align="center">✅</td> <td align="center">❌</td> <td align="center">➖</td></tr> <tr><td><code>cacher</code></td> <td>存储计算的中间结果以加速后续操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>compiler</code></td> <td>为特定硬件优化模型指令。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>distiller</code></td> <td>训练一个更小、更简单的模型来模仿一个更大、更复杂的模型。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>quantizer</code></td> <td>降低权重和激活的精度,减少内存需求。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>pruner</code></td> <td>移除不重要或冗余的连接和神经元,产生一个更稀疏、更高效的网络。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>recoverer</code></td> <td>在压缩后恢复模型的性能。</td> <td align="center">➖</td> <td align="center">➖</td> <td align="center">✅</td></tr> <tr><td><code>factorizer</code></td> <td>将多个小矩阵乘法批处理为一个大型融合操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>enhancer</code></td> <td>通过应用后处理算法(如去噪或上采样)来增强模型输出。</td> <td align="center">❌</td> <td align="center">-</td> <td align="center">✅</td></tr></tbody>',et,Q,$t="✅ (改进), ➖ (大致相同), ❌ (恶化)",st,R,Qt='在 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 文档</a> 中探索所有优化方法。',at,Z,nt,v,Rt="使用以下命令安装 Pruna。",it,W,pt,S,ot,_,Zt="Diffusers 模型支持广泛的优化算法,如下所示。",rt,w,vt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/diffusers_combinations.png" alt="Diffusers 模型支持的优化算法概览"/>',ct,E,Wt='下面的示例使用 factorizer、compiler 和 cacher 算法的组合优化 <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" rel="nofollow">black-forest-labs/FLUX.1-dev</a>。这种组合将推理速度加速高达 4.2 倍,并将峰值 GPU 内存使用从 34.7GB 减少到 28.0GB,同时几乎保持相同的输出质量。',mt,C,St=`<p>参考 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html" rel="nofollow">Pruna 优化</a> 文档以了解更多关于该操作的信息。
本示例中使用的优化技术。</p>`,dt,g,_t='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_combination.png" alt="用于FLUX.1-dev的优化技术展示,结合了因子分解器、编译器和缓存器算法"/>',ut,k,Et="首先定义一个包含要使用的优化算法的<code>SmashConfig</code>。要优化模型,将管道和<code>SmashConfig</code>用<code>smash</code>包装,然后像往常一样使用管道进行推理。",Ut,G,Jt,F,kt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_smashed_comparison.png"/>',Tt,X,Gt="优化后,我们可以使用Hugging Face Hub共享和加载优化后的模型。",ht,x,Mt,Y,ft,H,Xt='Pruna提供了<a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">EvaluationAgent</a>来评估优化后模型的质量。',Vt,L,xt="我们可以定义我们关心的指标,如总时间和吞吐量,以及要评估的数据集。我们可以定义一个模型并将其传递给<code>EvaluationAgent</code>。",jt,b,yt,q,Yt="现在您已经了解了如何优化和评估您的模型,可以开始使用 Pruna 来优化您自己的模型了。幸运的是,我们有许多示例来帮助您入门。",wt,B,Ht='<p>有关基准测试 Flux 的更多详细信息,请查看 <a href="https://huggingface.co/blog/PrunaAI/flux-fastest-image-generation-endpoint" rel="nofollow">宣布 FLUX-Juiced:最快的图像生成端点(快 2.6 倍)!</a> 博客文章和 <a href="https://huggingface.co/spaces/PrunaAI/InferBench" rel="nofollow">InferBench</a> 空间。</p>',Ct,z,gt,A,Lt='<li><a href="https://github.com/pruna-ai/pruna" rel="nofollow">Pruna</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 优化</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">Pruna 评估</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/tutorials/index.html" rel="nofollow">Pruna 教程</a></li>',Ft,P,bt,D,Bt;return n=new sl({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),U=new K({props:{title:"Pruna",local:"pruna",headingTag:"h1"}}),Z=new K({props:{title:"安装",local:"安装",headingTag:"h2"}}),W=new tt({props:{code:"cGlwJTIwaW5zdGFsbCUyMHBydW5h",highlighted:"pip install pruna",wrap:!1}}),S=new K({props:{title:"优化 Diffusers 模型",local:"优化-diffusers-模型",headingTag:"h2"}}),G=new tt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUyQyUyMFNtYXNoQ29uZmlnJTJDJTIwc21hc2glMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTVzZWdtaW5kJTJGU2VnbWluZC1WZWdhJUU2JTg4JTk2YmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtc2NobmVsbCUwQXBpcGUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU5JTg1JThEJUU3JUJEJUFFJTBBc21hc2hfY29uZmlnJTIwJTNEJTIwU21hc2hDb25maWcoKSUwQXNtYXNoX2NvbmZpZyU1QiUyMmZhY3Rvcml6ZXIlMjIlNUQlMjAlM0QlMjAlMjJxa3ZfZGlmZnVzZXJzJTIyJTBBc21hc2hfY29uZmlnJTVCJTIyY29tcGlsZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaF9jb21waWxlJTIyJTBBc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV90YXJnZXQlMjIlNUQlMjAlM0QlMjAlMjJtb2R1bGVfbGlzdCUyMiUwQXNtYXNoX2NvbmZpZyU1QiUyMmNhY2hlciUyMiU1RCUyMCUzRCUyMCUyMmZvcmElMjIlMEFzbWFzaF9jb25maWclNUIlMjJmb3JhX2ludGVydmFsJTIyJTVEJTIwJTNEJTIwMiUwQSUwQSUyMyUyMCVFNCVCOCVCQSVFNCVCQSU4NiVFOCU4RSVCNyVFNSVCRSU5NyVFNiU5QyU4MCVFNCVCRCVCMyVFOSU4MCU5RiVFNSVCQSVBNiVFNyVCQiU5MyVFNiU5RSU5QyVFRiVCQyU4QyVFNSU4RiVBRiVFNCVCQiVBNSVFNiVCNyVCQiVFNSU4QSVBMCVFOCVCRiU5OSVFNCVCQSU5QiVFOSU4NSU4RCVFNyVCRCVBRSUwQSUyMyUyMCVFNCVCRCU4NiVFNSVBRSU4MyVFNCVCQiVBQyVFNCVCQyU5QSVFNSVCMCU4NiVFOSVBMiU4NCVFNyU4MyVBRCVFNiU5NyVCNiVFOSU5NyVCNCVFNCVCQiU4RTEuNSVFNSU4OCU4NiVFOSU5MiU5RiVFNSVBMiU5RSVFNSU4QSVBMCVFNSU4OCVCMDEwJUU1JTg4JTg2JUU5JTkyJTlGJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV9tb2RlJTIyJTVEJTIwJTNEJTIwJTIybWF4LWF1dG90dW5lLW5vLWN1ZGFncmFwaHMlMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJxdWFudGl6ZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaGFvJTIyJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hhb19xdWFudF90eXBlJTIyJTVEJTIwJTNEJTIwJTIyZnA4ZHElMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJ0b3JjaGFvX2V4Y2x1ZGVkX21vZHVsZXMlMjIlNUQlMjAlM0QlMjAlMjJub3JtJTJCZW1iZWRkaW5nJTIyJTBBJTBBJTIzJTIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwc21hc2gocGlwZSUyQyUyMHNtYXNoX2NvbmZpZyklMEElMEElMjMlMjAlRTglQkYlOTAlRTglQTElOEMlRTYlQTglQTElRTUlOUUlOEIlMEFzbWFzaGVkX3BpcGUoJTIyYSUyMGtuaXR0ZWQlMjBwdXJwbGUlMjBwcnVuZSUyMikuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel, SmashConfig, smash
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试segmind/Segmind-Vega或black-forest-labs/FLUX.1-schnell</span>
pipe = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
torch_dtype=torch.bfloat16
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># 定义配置</span>
smash_config = SmashConfig()
smash_config[<span class="hljs-string">&quot;factorizer&quot;</span>] = <span class="hljs-string">&quot;qkv_diffusers&quot;</span>
smash_config[<span class="hljs-string">&quot;compiler&quot;</span>] = <span class="hljs-string">&quot;torch_compile&quot;</span>
smash_config[<span class="hljs-string">&quot;torch_compile_target&quot;</span>] = <span class="hljs-string">&quot;module_list&quot;</span>
smash_config[<span class="hljs-string">&quot;cacher&quot;</span>] = <span class="hljs-string">&quot;fora&quot;</span>
smash_config[<span class="hljs-string">&quot;fora_interval&quot;</span>] = <span class="hljs-number">2</span>
<span class="hljs-comment"># 为了获得最佳速度结果,可以添加这些配置</span>
<span class="hljs-comment"># 但它们会将预热时间从1.5分钟增加到10分钟</span>
<span class="hljs-comment"># smash_config[&quot;torch_compile_mode&quot;] = &quot;max-autotune-no-cudagraphs&quot;</span>
<span class="hljs-comment"># smash_config[&quot;quantizer&quot;] = &quot;torchao&quot;</span>
<span class="hljs-comment"># smash_config[&quot;torchao_quant_type&quot;] = &quot;fp8dq&quot;</span>
<span class="hljs-comment"># smash_config[&quot;torchao_excluded_modules&quot;] = &quot;norm+embedding&quot;</span>
<span class="hljs-comment"># 优化模型</span>
smashed_pipe = smash(pipe, smash_config)
<span class="hljs-comment"># 运行模型</span>
smashed_pipe(<span class="hljs-string">&quot;a knitted purple prune&quot;</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),x=new tt({props:{code:"JTIzJTIwJUU0JUJGJTlEJUU1JUFEJTk4JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlLnNhdmVfdG9faHViKCUyMiUzQ3VzZXJuYW1lJTNFJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQXNtYXNoZWRfcGlwZSUyMCUzRCUyMFBydW5hTW9kZWwuZnJvbV9odWIoJTIyJTNDdXNlcm5hbWUlM0UlMkZGTFVYLjEtZGV2LXNtYXNoZWQlMjIp",highlighted:`<span class="hljs-comment"># 保存模型</span>
smashed_pipe.save_to_hub(<span class="hljs-string">&quot;&lt;username&gt;/FLUX.1-dev-smashed&quot;</span>)
<span class="hljs-comment"># 加载模型</span>
smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">&quot;&lt;username&gt;/FLUX.1-dev-smashed&quot;</span>)`,wrap:!1}}),Y=new K({props:{title:"评估和基准测试Diffusers模型",local:"评估和基准测试diffusers模型",headingTag:"h2"}}),b=new nl({props:{id:"eval",options:["optimized model","standalone model"],$$slots:{default:[ol]},$$scope:{ctx:y}}}),z=new K({props:{title:"参考",local:"参考",headingTag:"h2"}}),P=new al({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/pruna.md"}}),{c(){i=m("meta"),J=p(),c=m("p"),r=p(),T(n.$$.fragment),a=p(),T(U.$$.fragment),N=p(),I=m("p"),I.innerHTML=Nt,lt=p(),$=m("table"),$.innerHTML=It,et=p(),Q=m("p"),Q.textContent=$t,st=p(),R=m("p"),R.innerHTML=Qt,at=p(),T(Z.$$.fragment),nt=p(),v=m("p"),v.textContent=Rt,it=p(),T(W.$$.fragment),pt=p(),T(S.$$.fragment),ot=p(),_=m("p"),_.textContent=Zt,rt=p(),w=m("div"),w.innerHTML=vt,ct=p(),E=m("p"),E.innerHTML=Wt,mt=p(),C=m("blockquote"),C.innerHTML=St,dt=p(),g=m("div"),g.innerHTML=_t,ut=p(),k=m("p"),k.innerHTML=Et,Ut=p(),T(G.$$.fragment),Jt=p(),F=m("div"),F.innerHTML=kt,Tt=p(),X=m("p"),X.textContent=Gt,ht=p(),T(x.$$.fragment),Mt=p(),T(Y.$$.fragment),ft=p(),H=m("p"),H.innerHTML=Xt,Vt=p(),L=m("p"),L.innerHTML=xt,jt=p(),T(b.$$.fragment),yt=p(),q=m("p"),q.textContent=Yt,wt=p(),B=m("blockquote"),B.innerHTML=Ht,Ct=p(),T(z.$$.fragment),gt=p(),A=m("ul"),A.innerHTML=Lt,Ft=p(),T(P.$$.fragment),bt=p(),D=m("p"),this.h()},l(t){const l=ll("svelte-u9bgzb",document.head);i=d(l,"META",{name:!0,content:!0}),l.forEach(e),J=o(t),c=d(t,"P",{}),zt(c).forEach(e),r=o(t),h(n.$$.fragment,t),a=o(t),h(U.$$.fragment,t),N=o(t),I=d(t,"P",{"data-svelte-h":!0}),u(I)!=="svelte-14zuif9"&&(I.innerHTML=Nt),lt=o(t),$=d(t,"TABLE",{"data-svelte-h":!0}),u($)!=="svelte-1870rsx"&&($.innerHTML=It),et=o(t),Q=d(t,"P",{"data-svelte-h":!0}),u(Q)!=="svelte-1syz01b"&&(Q.textContent=$t),st=o(t),R=d(t,"P",{"data-svelte-h":!0}),u(R)!=="svelte-uq0kan"&&(R.innerHTML=Qt),at=o(t),h(Z.$$.fragment,t),nt=o(t),v=d(t,"P",{"data-svelte-h":!0}),u(v)!=="svelte-1g5o862"&&(v.textContent=Rt),it=o(t),h(W.$$.fragment,t),pt=o(t),h(S.$$.fragment,t),ot=o(t),_=d(t,"P",{"data-svelte-h":!0}),u(_)!=="svelte-1oye83w"&&(_.textContent=Zt),rt=o(t),w=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(w)!=="svelte-1uhmbha"&&(w.innerHTML=vt),ct=o(t),E=d(t,"P",{"data-svelte-h":!0}),u(E)!=="svelte-1bmdjcc"&&(E.innerHTML=Wt),mt=o(t),C=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(C)!=="svelte-137yx3g"&&(C.innerHTML=St),dt=o(t),g=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(g)!=="svelte-1yaafaz"&&(g.innerHTML=_t),ut=o(t),k=d(t,"P",{"data-svelte-h":!0}),u(k)!=="svelte-146qhef"&&(k.innerHTML=Et),Ut=o(t),h(G.$$.fragment,t),Jt=o(t),F=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(F)!=="svelte-1or519q"&&(F.innerHTML=kt),Tt=o(t),X=d(t,"P",{"data-svelte-h":!0}),u(X)!=="svelte-1yw57hm"&&(X.textContent=Gt),ht=o(t),h(x.$$.fragment,t),Mt=o(t),h(Y.$$.fragment,t),ft=o(t),H=d(t,"P",{"data-svelte-h":!0}),u(H)!=="svelte-ufsph6"&&(H.innerHTML=Xt),Vt=o(t),L=d(t,"P",{"data-svelte-h":!0}),u(L)!=="svelte-76s5k8"&&(L.innerHTML=xt),jt=o(t),h(b.$$.fragment,t),yt=o(t),q=d(t,"P",{"data-svelte-h":!0}),u(q)!=="svelte-6gtl5s"&&(q.textContent=Yt),wt=o(t),B=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(B)!=="svelte-t41fkn"&&(B.innerHTML=Ht),Ct=o(t),h(z.$$.fragment,t),gt=o(t),A=d(t,"UL",{"data-svelte-h":!0}),u(A)!=="svelte-5ha4im"&&(A.innerHTML=Lt),Ft=o(t),h(P.$$.fragment,t),bt=o(t),D=d(t,"P",{}),zt(D).forEach(e),this.h()},h(){O(i,"name","hf:doc:metadata"),O(i,"content",cl),O(w,"class","flex justify-center"),O(C,"class","tip"),O(g,"class","flex justify-center"),O(F,"class","flex justify-center"),O(B,"class","tip")},m(t,l){el(document.head,i),s(t,J,l),s(t,c,l),s(t,r,l),M(n,t,l),s(t,a,l),M(U,t,l),s(t,N,l),s(t,I,l),s(t,lt,l),s(t,$,l),s(t,et,l),s(t,Q,l),s(t,st,l),s(t,R,l),s(t,at,l),M(Z,t,l),s(t,nt,l),s(t,v,l),s(t,it,l),M(W,t,l),s(t,pt,l),M(S,t,l),s(t,ot,l),s(t,_,l),s(t,rt,l),s(t,w,l),s(t,ct,l),s(t,E,l),s(t,mt,l),s(t,C,l),s(t,dt,l),s(t,g,l),s(t,ut,l),s(t,k,l),s(t,Ut,l),M(G,t,l),s(t,Jt,l),s(t,F,l),s(t,Tt,l),s(t,X,l),s(t,ht,l),M(x,t,l),s(t,Mt,l),M(Y,t,l),s(t,ft,l),s(t,H,l),s(t,Vt,l),s(t,L,l),s(t,jt,l),M(b,t,l),s(t,yt,l),s(t,q,l),s(t,wt,l),s(t,B,l),s(t,Ct,l),M(z,t,l),s(t,gt,l),s(t,A,l),s(t,Ft,l),M(P,t,l),s(t,bt,l),s(t,D,l),Bt=!0},p(t,[l]){const qt={};l&2&&(qt.$$scope={dirty:l,ctx:t}),b.$set(qt)},i(t){Bt||(f(n.$$.fragment,t),f(U.$$.fragment,t),f(Z.$$.fragment,t),f(W.$$.fragment,t),f(S.$$.fragment,t),f(G.$$.fragment,t),f(x.$$.fragment,t),f(Y.$$.fragment,t),f(b.$$.fragment,t),f(z.$$.fragment,t),f(P.$$.fragment,t),Bt=!0)},o(t){V(n.$$.fragment,t),V(U.$$.fragment,t),V(Z.$$.fragment,t),V(W.$$.fragment,t),V(S.$$.fragment,t),V(G.$$.fragment,t),V(x.$$.fragment,t),V(Y.$$.fragment,t),V(b.$$.fragment,t),V(z.$$.fragment,t),V(P.$$.fragment,t),Bt=!1},d(t){t&&(e(J),e(c),e(r),e(a),e(N),e(I),e(lt),e($),e(et),e(Q),e(st),e(R),e(at),e(nt),e(v),e(it),e(pt),e(ot),e(_),e(rt),e(w),e(ct),e(E),e(mt),e(C),e(dt),e(g),e(ut),e(k),e(Ut),e(Jt),e(F),e(Tt),e(X),e(ht),e(Mt),e(ft),e(H),e(Vt),e(L),e(jt),e(yt),e(q),e(wt),e(B),e(Ct),e(gt),e(A),e(Ft),e(bt),e(D)),e(i),j(n,t),j(U,t),j(Z,t),j(W,t),j(S,t),j(G,t),j(x,t),j(Y,t),j(b,t),j(z,t),j(P,t)}}}const cl='{"title":"Pruna","local":"pruna","sections":[{"title":"安装","local":"安装","sections":[],"depth":2},{"title":"优化 Diffusers 模型","local":"优化-diffusers-模型","sections":[],"depth":2},{"title":"评估和基准测试Diffusers模型","local":"评估和基准测试diffusers模型","sections":[],"depth":2},{"title":"参考","local":"参考","sections":[],"depth":2}],"depth":1}';function ml(y){return Dt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class hl extends Kt{constructor(i){super(),tl(this,i,ml,rl,Ot,{})}}export{hl as component};

Xet Storage Details

Size:
25.3 kB
·
Xet hash:
13afaf70c0f0e20fc5f1552e1363083ed2869e031510dddbde12926cc75e0d5c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.