Buckets:
| import{s as ve,o as ke,n as Se}from"../chunks/scheduler.94020406.js";import{S as Ae,i as Xe,g as p,s as n,r as J,m as $l,E as xe,h as M,f as e,c as a,j as Lt,u as c,x as i,n as El,k as vl,y as Ne,a as s,v as o,d as r,t as y,w}from"../chunks/index.a08c8d92.js";import{T as qt}from"../chunks/Tip.3b0aeee8.js";import{C as f}from"../chunks/CodeBlock.b23cf525.js";import{H as h,E as Fe}from"../chunks/EditOnGithub.b1bceb47.js";function Ye(j){let U;return{c(){U=$l("어떤 파이프라인에서도 [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) 를 사용하는 것은 검은색 이미지를 생성할 수 있고, 순수한 float16 정밀도를 사용하는 것보다 항상 느리기 때문에 사용하지 않는 것이 좋습니다.")},l(m){U=El(m,"어떤 파이프라인에서도 [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) 를 사용하는 것은 검은색 이미지를 생성할 수 있고, 순수한 float16 정밀도를 사용하는 것보다 항상 느리기 때문에 사용하지 않는 것이 좋습니다.")},m(m,u){s(m,U,u)},d(m){m&&e(U)}}}function He(j){let U;return{c(){U=$l(`Attention slicing은 모델이 하나 이상의 어텐션 헤드를 사용하는 한, 배치 크기가 1인 경우에도 유용합니다. | |
| 하나 이상의 어텐션 헤드가 있는 경우 *QK^T* 어텐션 매트릭스는 상당한 양의 메모리를 절약할 수 있는 각 헤드에 대해 순차적으로 계산될 수 있습니다.`)},l(m){U=El(m,`Attention slicing은 모델이 하나 이상의 어텐션 헤드를 사용하는 한, 배치 크기가 1인 경우에도 유용합니다. | |
| 하나 이상의 어텐션 헤드가 있는 경우 *QK^T* 어텐션 매트릭스는 상당한 양의 메모리를 절약할 수 있는 각 헤드에 대해 순차적으로 계산될 수 있습니다.`)},m(m,u){s(m,U,u)},d(m){m&&e(U)}}}function ze(j){let U,m,u="모델 오프로딩",d;return{c(){U=$l("또 다른 최적화 방법인 "),m=p("a"),m.textContent=u,d=$l("을 사용하는 것을 고려하십시오. 이는 훨씬 빠르지만 메모리 절약이 크지는 않습니다."),this.h()},l(T){U=El(T,"또 다른 최적화 방법인 "),m=M(T,"A",{href:!0,"data-svelte-h":!0}),i(m)!=="svelte-zbpoyt"&&(m.textContent=u),d=El(T,"을 사용하는 것을 고려하십시오. 이는 훨씬 빠르지만 메모리 절약이 크지는 않습니다."),this.h()},h(){vl(m,"href","#model_offloading")},m(T,b){s(T,U,b),s(T,m,b),s(T,d,b)},p:Se,d(T){T&&(e(U),e(m),e(d))}}}function De(j){let U;return{c(){U=$l("이 기능을 사용하려면 'accelerate' 버전 0.17.0 이상이 필요합니다.")},l(m){U=El(m,"이 기능을 사용하려면 'accelerate' 버전 0.17.0 이상이 필요합니다.")},m(m,u){s(m,U,u)},d(m){m&&e(U)}}}function Le(j){let U,m,u,d,T,b,R,Ot=`메모리 또는 속도에 대해 🤗 Diffusers <em>추론</em>을 최적화하기 위한 몇 가지 기술과 아이디어를 제시합니다. | |
| 일반적으로, memory-efficient attention을 위해 <a href="https://github.com/facebookresearch/xformers" rel="nofollow">xFormers</a> 사용을 추천하기 때문에, 추천하는 <a href="xformers">설치 방법</a>을 보고 설치해 보세요.`,kl,Q,Pt="다음 설정이 성능과 메모리에 미치는 영향에 대해 설명합니다.",Sl,V,Kt="<thead><tr><th></th> <th>지연시간</th> <th>속도 향상</th></tr></thead> <tbody><tr><td>별도 설정 없음</td> <td>9.50s</td> <td>x1</td></tr> <tr><td>cuDNN auto-tuner</td> <td>9.37s</td> <td>x1.01</td></tr> <tr><td>fp16</td> <td>3.61s</td> <td>x2.63</td></tr> <tr><td>Channels Last 메모리 형식</td> <td>3.30s</td> <td>x2.88</td></tr> <tr><td>traced UNet</td> <td>3.21s</td> <td>x2.96</td></tr> <tr><td>memory-efficient attention</td> <td>2.63s</td> <td>x3.61</td></tr></tbody>",Al,G,le='NVIDIA TITAN RTX에서 50 DDIM 스텝의 "a photo of an astronaut riding a horse on mars" 프롬프트로 512x512 크기의 단일 이미지를 생성하였습니다.',Xl,$,xl,E,te='<a href="https://developer.nvidia.com/cudnn" rel="nofollow">NVIDIA cuDNN</a>은 컨볼루션을 계산하는 많은 알고리즘을 지원합니다. Autotuner는 짧은 벤치마크를 실행하고 주어진 입력 크기에 대해 주어진 하드웨어에서 최고의 성능을 가진 커널을 선택합니다.',Nl,_,ee="<strong>컨볼루션 네트워크</strong>를 활용하고 있기 때문에 (다른 유형들은 현재 지원되지 않음), 다음 설정을 통해 추론 전에 cuDNN autotuner를 활성화할 수 있습니다:",Fl,W,Yl,g,Hl,v,se=`Ampere 및 이후 CUDA 장치에서 행렬곱 및 컨볼루션은 TensorFloat32(TF32) 모드를 사용하여 더 빠르지만 약간 덜 정확할 수 있습니다. | |
| 기본적으로 PyTorch는 컨볼루션에 대해 TF32 모드를 활성화하지만 행렬 곱셈은 활성화하지 않습니다. | |
| 네트워크에 완전한 float32 정밀도가 필요한 경우가 아니면 행렬 곱셈에 대해서도 이 설정을 활성화하는 것이 좋습니다. | |
| 이는 일반적으로 무시할 수 있는 수치의 정확도 손실이 있지만, 계산 속도를 크게 높일 수 있습니다. | |
| 그것에 대해 <a href="https://huggingface.co/docs/transformers/v4.18.0/en/performance#tf32" rel="nofollow">여기</a>서 더 읽을 수 있습니다. | |
| 추론하기 전에 다음을 추가하기만 하면 됩니다:`,zl,k,Dl,S,Ll,A,ne=`더 많은 GPU 메모리를 절약하고 더 빠른 속도를 얻기 위해 모델 가중치를 반정밀도(half precision)로 직접 불러오고 실행할 수 있습니다. | |
| 여기에는 <code>fp16</code>이라는 브랜치에 저장된 float16 버전의 가중치를 불러오고, 그 때 <code>float16</code> 유형을 사용하도록 PyTorch에 지시하는 작업이 포함됩니다.`,ql,X,Ol,C,Pl,x,Kl,N,ae="추가 메모리 절약을 위해, 한 번에 모두 계산하는 대신 단계적으로 계산을 수행하는 슬라이스 버전의 어텐션(attention)을 사용할 수 있습니다.",lt,I,tt,F,pe="각 헤드에 대해 순차적으로 어텐션 계산을 수행하려면, 다음과 같이 추론 전에 파이프라인에서 <code>enable_attention_slicing()</code>를 호출하면 됩니다:",et,Y,st,H,Me="추론 시간이 약 10% 느려지는 약간의 성능 저하가 있지만 이 방법을 사용하면 3.2GB 정도의 작은 VRAM으로도 Stable Diffusion을 사용할 수 있습니다!",nt,z,at,D,ie="제한된 VRAM에서 대규모 이미지 배치를 디코딩하거나 32개 이상의 이미지가 포함된 배치를 활성화하기 위해, 배치의 latent 이미지를 한 번에 하나씩 디코딩하는 슬라이스 VAE 디코드를 사용할 수 있습니다.",pt,L,me="이를 <code>enable_attention_slicing()</code> 또는 <code>enable_xformers_memory_efficient_attention()</code>과 결합하여 메모리 사용을 추가로 최소화할 수 있습니다.",Mt,q,Ue="VAE 디코드를 한 번에 하나씩 수행하려면 추론 전에 파이프라인에서 <code>enable_vae_slicing()</code>을 호출합니다. 예를 들어:",it,O,mt,P,Je="다중 이미지 배치에서 VAE 디코드가 약간의 성능 향상이 이루어집니다. 단일 이미지 배치에서는 성능 영향은 없습니다.",Ut,_l,Jt,K,ce="추가 메모리 절약을 위해 가중치를 CPU로 오프로드하고 순방향 전달을 수행할 때만 GPU로 로드할 수 있습니다.",ct,ll,oe="CPU 오프로딩을 수행하려면 <code>enable_sequential_cpu_offload()</code>를 호출하기만 하면 됩니다:",ot,tl,rt,el,re="그러면 메모리 소비를 3GB 미만으로 줄일 수 있습니다.",yt,sl,ye="참고로 이 방법은 전체 모델이 아닌 서브모듈 수준에서 작동합니다. 이는 메모리 소비를 최소화하는 가장 좋은 방법이지만 프로세스의 반복적 특성으로 인해 추론 속도가 훨씬 느립니다. 파이프라인의 UNet 구성 요소는 여러 번 실행됩니다(‘num_inference_steps’ 만큼). 매번 UNet의 서로 다른 서브모듈이 순차적으로 온로드된 다음 필요에 따라 오프로드되므로 메모리 이동 횟수가 많습니다.",wt,Z,Tt,nl,we="또한 ttention slicing과 연결해서 최소 메모리(< 2GB)로도 동작할 수 있습니다.",ut,al,ft,pl,Te='<strong>참고</strong>: ‘enable_sequential_cpu_offload()‘를 사용할 때, 미리 파이프라인을 CUDA로 이동하지 <strong>않는</strong> 것이 중요합니다.그렇지 않으면 메모리 소비의 이득이 최소화됩니다. 더 많은 정보를 위해 <a href="https://github.com/huggingface/diffusers/issues/1934" rel="nofollow">이 이슈</a>를 보세요.',jt,Wl,dt,Ml,ue='<a href="#sequential_offloading">순차적 CPU 오프로딩</a>은 이전 섹션에서 설명한 것처럼 많은 메모리를 보존하지만 필요에 따라 서브모듈을 GPU로 이동하고 새 모듈이 실행될 때 즉시 CPU로 반환되기 때문에 추론 속도가 느려집니다.',ht,il,fe="전체 모델 오프로딩은 각 모델의 구성 요소인 <em>modules</em>을 처리하는 대신, 전체 모델을 GPU로 이동하는 대안입니다. 이로 인해 추론 시간에 미치는 영향은 미미하지만(파이프라인을 ‘cuda’로 이동하는 것과 비교하여) 여전히 약간의 메모리를 절약할 수 있습니다.",bt,ml,je=`이 시나리오에서는 파이프라인의 주요 구성 요소 중 하나만(일반적으로 텍스트 인코더, unet 및 vae) GPU에 있고, 나머지는 CPU에서 대기할 것입니다. | |
| 여러 반복을 위해 실행되는 UNet과 같은 구성 요소는 더 이상 필요하지 않을 때까지 GPU에 남아 있습니다.`,Ct,Ul,de="이 기능은 아래와 같이 파이프라인에서 <code>enable_model_cpu_offload()</code>를 호출하여 활성화할 수 있습니다.",It,Jl,Zt,cl,he="이는 추가적인 메모리 절약을 위한 attention slicing과도 호환됩니다.",Bt,ol,Rt,B,Qt,rl,Vt,yl,be=`Channels Last 메모리 형식은 차원 순서를 보존하는 메모리에서 NCHW 텐서 배열을 대체하는 방법입니다. | |
| Channels Last 텐서는 채널이 가장 조밀한 차원이 되는 방식으로 정렬됩니다(일명 픽셀당 이미지를 저장). | |
| 현재 모든 연산자 Channels Last 형식을 지원하는 것은 아니라 성능이 저하될 수 있으므로, 사용해보고 모델에 잘 작동하는지 확인하는 것이 좋습니다.`,Gt,wl,Ce="예를 들어 파이프라인의 UNet 모델이 channels Last 형식을 사용하도록 설정하려면 다음을 사용할 수 있습니다:",$t,Tl,Et,ul,_t,fl,Ie="추적은 모델을 통해 예제 입력 텐서를 통해 실행되는데, 해당 입력이 모델의 레이어를 통과할 때 호출되는 작업을 캡처하여 실행 파일 또는 ‘ScriptFunction’이 반환되도록 하고, 이는 just-in-time 컴파일로 최적화됩니다.",Wt,jl,Ze="UNet 모델을 추적하기 위해 다음을 사용할 수 있습니다:",gt,dl,vt,hl,Be="그 다음, 파이프라인의 <code>unet</code> 특성을 다음과 같이 추적된 모델로 바꿀 수 있습니다.",kt,bl,St,Cl,At,Il,Re=`어텐션 블록의 대역폭을 최적화하는 최근 작업으로 GPU 메모리 사용량이 크게 향상되고 향상되었습니다. | |
| @tridao의 가장 최근의 플래시 어텐션: <a href="https://github.com/HazyResearch/flash-attention" rel="nofollow">code</a>, <a href="https://arxiv.org/pdf/2205.14135.pdf" rel="nofollow">paper</a>.`,Xt,Zl,Qe="배치 크기 1(프롬프트 1개)의 512x512 크기로 추론을 실행할 때 몇 가지 Nvidia GPU에서 얻은 속도 향상은 다음과 같습니다:",xt,Bl,Ve="<thead><tr><th>GPU</th> <th>기준 어텐션 FP16</th> <th>메모리 효율적인 어텐션 FP16</th></tr></thead> <tbody><tr><td>NVIDIA Tesla T4</td> <td>3.5it/s</td> <td>5.5it/s</td></tr> <tr><td>NVIDIA 3060 RTX</td> <td>4.6it/s</td> <td>7.8it/s</td></tr> <tr><td>NVIDIA A10G</td> <td>8.88it/s</td> <td>15.6it/s</td></tr> <tr><td>NVIDIA RTX A6000</td> <td>11.7it/s</td> <td>21.09it/s</td></tr> <tr><td>NVIDIA TITAN RTX</td> <td>12.51it/s</td> <td>18.22it/s</td></tr> <tr><td>A100-SXM4-40GB</td> <td>18.6it/s</td> <td>29.it/s</td></tr> <tr><td>A100-SXM-80GB</td> <td>18.7it/s</td> <td>29.5it/s</td></tr></tbody>",Nt,Rl,Ge="이를 활용하려면 다음을 만족해야 합니다:",Ft,Ql,$e='<li>PyTorch > 1.12</li> <li>Cuda 사용 가능</li> <li><a href="xformers">xformers 라이브러리를 설치함</a></li>',Yt,Vl,Ht,Gl,zt,gl,Dt;return T=new h({props:{title:"메모리와 속도",local:"메모리와-속도",headingTag:"h1"}}),$=new h({props:{title:"cuDNN auto-tuner 활성화하기",local:"cudnn-auto-tuner-활성화하기",headingTag:"h2"}}),W=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRubi5iZW5jaG1hcmslMjAlM0QlMjBUcnVl",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cudnn.benchmark = <span class="hljs-literal">True</span>`,wrap:!1}}),g=new h({props:{title:"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)",local:"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서",headingTag:"h3"}}),k=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRhLm1hdG11bC5hbGxvd190ZjMyJTIwJTNEJTIwVHJ1ZQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cuda.matmul.allow_tf32 = <span class="hljs-literal">True</span>`,wrap:!1}}),S=new h({props:{title:"반정밀도 가중치",local:"반정밀도-가중치",headingTag:"h2"}}),X=new f({props:{code:"cGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEFwaXBlJTIwJTNEJTIwcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),C=new qt({props:{warning:!0,$$slots:{default:[Ye]},$$scope:{ctx:j}}}),x=new h({props:{title:"추가 메모리 절약을 위한 슬라이스 어텐션",local:"추가-메모리-절약을-위한-슬라이스-어텐션",headingTag:"h2"}}),I=new qt({props:{$$slots:{default:[He]},$$scope:{ctx:j}}}),Y=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMG9uJTIwbWFycyUyMiUwQXBpcGUuZW5hYmxlX2F0dGVudGlvbl9zbGljaW5nKCklMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_attention_slicing() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),z=new h({props:{title:"더 큰 배치를 위한 sliced VAE 디코드",local:"더-큰-배치를-위한-sliced-vae-디코드",headingTag:"h2"}}),O=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMG9uJTIwbWFycyUyMiUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEFpbWFnZXMlMjAlM0QlMjBwaXBlKCU1QnByb21wdCU1RCUyMColMjAzMikuaW1hZ2Vz",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_vae_slicing() | |
| images = pipe([prompt] * <span class="hljs-number">32</span>).images`,wrap:!1}}),tl=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKCklMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Z=new qt({props:{$$slots:{default:[ze]},$$scope:{ctx:j}}}),al=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKCklMEFwaXBlLmVuYWJsZV9hdHRlbnRpb25fc2xpY2luZygxKSUwQSUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Jl=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),ol=new f({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBcGlwZS5lbmFibGVfYXR0ZW50aW9uX3NsaWNpbmcoMSklMEElMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),B=new qt({props:{$$slots:{default:[De]},$$scope:{ctx:j}}}),rl=new h({props:{title:"Channels Last 메모리 형식 사용하기",local:"channels-last-메모리-형식-사용하기",headingTag:"h2"}}),Tl=new f({props:{code:"cHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSklMjAlMjAlMjMlMjAoMjg4MCUyQyUyMDklMkMlMjAzJTJDJTIwMSklMEFwaXBlLnVuZXQudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTIwJTIwJTIzJTIwaW4tcGxhY2UlMjAlRUMlOTclQjAlRUMlODIlQjAlMEElMjMlMjAyJUVCJUIyJTg4JUVDJUE3JUI4JTIwJUVDJUIwJUE4JUVDJTlCJTkwJUVDJTk3JTkwJUVDJTg0JTlDJTIwJUVDJThBJUE0JUVEJThBJUI4JUVCJTlEJUJDJUVDJTlEJUI0JUVCJTkzJTlDJTIwMSVFQyU5RCU4NCUyMCVFQSVCMCU4MCVFQyVBNyU4MCVFQiU4QSU5NCUyMCgyODgwJTJDJTIwMSUyQyUyMDk2MCUyQyUyMDMyMCklRUIlQTElOUMlMkMlMjAlRUMlOTclQjAlRUMlODIlQjAlRUMlOUQlQjQlMjAlRUMlOUUlOTElRUIlOEYlOTklRUQlOTUlQTglRUMlOUQlODQlMjAlRUMlQTYlOUQlRUIlQUElODUlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBcHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSk=",highlighted:`<span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride()) <span class="hljs-comment"># (2880, 9, 3, 1)</span> | |
| pipe.unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># in-place 연산</span> | |
| <span class="hljs-comment"># 2번째 차원에서 스트라이드 1을 가지는 (2880, 1, 960, 320)로, 연산이 작동함을 증명합니다.</span> | |
| <span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride())`,wrap:!1}}),ul=new h({props:{title:"추적(tracing)",local:"추적tracing",headingTag:"h2"}}),dl=new f({props:{code:"aW1wb3J0JTIwdGltZSUwQWltcG9ydCUyMHRvcmNoJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwZnVuY3Rvb2xzJTBBJTBBJTIzJTIwdG9yY2glMjAlRUElQjglQjAlRUMlOUElQjglRUElQjglQjAlMjAlRUIlQjklODQlRUQlOTklOUMlRUMlODQlQjElRUQlOTklOTQlMEF0b3JjaC5zZXRfZ3JhZF9lbmFibGVkKEZhbHNlKSUwQSUwQSUyMyUyMCVFQiVCMyU4MCVFQyU4OCU5OCUyMCVFQyU4NCVBNCVFQyVBMCU5NSUwQW5fZXhwZXJpbWVudHMlMjAlM0QlMjAyJTBBdW5ldF9ydW5zX3Blcl9leHBlcmltZW50JTIwJTNEJTIwNTAlMEElMEElMEElMjMlMjAlRUMlOUUlODUlRUIlQTAlQTUlMjAlRUIlQjYlODglRUIlOUYlQUMlRUMlOTglQTQlRUElQjglQjAlMEFkZWYlMjBnZW5lcmF0ZV9pbnB1dHMoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNCUyQyUyMDY0JTJDJTIwNjQpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjB0aW1lc3RlcCUyMCUzRCUyMHRvcmNoLnJhbmQoMSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTIwKiUyMDk5OSUwQSUyMCUyMCUyMCUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNzclMkMlMjA3NjgpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzYW1wbGUlMkMlMjB0aW1lc3RlcCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUwQSUwQSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBdW5ldCUyMCUzRCUyMHBpcGUudW5ldCUwQXVuZXQuZXZhbCgpJTBBdW5ldC50byhtZW1vcnlfZm9ybWF0JTNEdG9yY2guY2hhbm5lbHNfbGFzdCklMjAlMjAlMjMlMjBDaGFubmVscyUyMExhc3QlMjAlRUIlQTklOTQlRUIlQUElQTglRUIlQTYlQUMlMjAlRUQlOTglOTUlRUMlOEIlOUQlMjAlRUMlODIlQUMlRUMlOUElQTklMEF1bmV0LmZvcndhcmQlMjAlM0QlMjBmdW5jdG9vbHMucGFydGlhbCh1bmV0LmZvcndhcmQlMkMlMjByZXR1cm5fZGljdCUzREZhbHNlKSUyMCUyMCUyMyUyMHJldHVybl9kaWN0JTNERmFsc2UlRUMlOUQlODQlMjAlRUElQjglQjAlRUIlQjMlQjglRUElQjAlOTIlRUMlOUMlQkMlRUIlQTElOUMlMjAlRUMlODQlQTQlRUMlQTAlOTUlMEElMEElMjMlMjAlRUMlOUIlOEMlRUIlQjAlOEQlRUMlOTclODUlMEFmb3IlMjBfJTIwaW4lMjByYW5nZSgzKSUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZ2VuZXJhdGVfaW5wdXRzKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXQoKmlucHV0cyklMEElMEElMjMlMjAlRUMlQjYlOTQlRUMlQTAlODElMEFwcmludCglMjJ0cmFjaW5nLi4lMjIpJTBBdW5ldF90cmFjZWQlMjAlM0QlMjB0b3JjaC5qaXQudHJhY2UodW5ldCUyQyUyMGlucHV0cyklMEF1bmV0X3RyYWNlZC5ldmFsKCklMEFwcmludCglMjJkb25lJTIwdHJhY2luZyUyMiklMEElMEElMEElMjMlMjAlRUMlOUIlOEMlRUIlQjAlOEQlRUMlOTclODUlMjAlRUIlQjAlOEYlMjAlRUElQjclQjglRUIlOUUlOTglRUQlOTQlODQlMjAlRUMlQjUlOUMlRUMlQTAlODElRUQlOTklOTQlMEFmb3IlMjBfJTIwaW4lMjByYW5nZSg1KSUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZ2VuZXJhdGVfaW5wdXRzKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXRfdHJhY2VkKCppbnB1dHMpJTBBJTBBJTBBJTIzJTIwJUVCJUIyJUE0JUVDJUI5JTk4JUVCJUE3JTg4JUVEJTgyJUI5JTBBd2l0aCUyMHRvcmNoLmluZmVyZW5jZV9tb2RlKCklM0ElMEElMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZShuX2V4cGVyaW1lbnRzKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRvcmNoLmN1ZGEuc3luY2hyb25pemUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0X3RpbWUlMjAlM0QlMjB0aW1lLnRpbWUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZvciUyMF8lMjBpbiUyMHJhbmdlKHVuZXRfcnVuc19wZXJfZXhwZXJpbWVudCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXRfdHJhY2VkKCppbnB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5zeW5jaHJvbml6ZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMnVuZXQlMjB0cmFjZWQlMjBpbmZlcmVuY2UlMjB0b29rJTIwJTdCdGltZS50aW1lKCklMjAtJTIwc3RhcnRfdGltZSUzQS4yZiU3RCUyMHNlY29uZHMlMjIpJTBBJTIwJTIwJTIwJTIwZm9yJTIwXyUyMGluJTIwcmFuZ2Uobl9leHBlcmltZW50cyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b3JjaC5jdWRhLnN5bmNocm9uaXplKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGFydF90aW1lJTIwJTNEJTIwdGltZS50aW1lKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZSh1bmV0X3J1bnNfcGVyX2V4cGVyaW1lbnQpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3JpZ19vdXRwdXQlMjAlM0QlMjB1bmV0KCppbnB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5zeW5jaHJvbml6ZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMnVuZXQlMjBpbmZlcmVuY2UlMjB0b29rJTIwJTdCdGltZS50aW1lKCklMjAtJTIwc3RhcnRfdGltZSUzQS4yZiU3RCUyMHNlY29uZHMlMjIpJTBBJTBBJTIzJTIwJUVCJUFBJUE4JUVCJThEJUI4JTIwJUVDJUEwJTgwJUVDJTlFJUE1JTBBdW5ldF90cmFjZWQuc2F2ZSglMjJ1bmV0X3RyYWNlZC5wdCUyMik=",highlighted:`<span class="hljs-keyword">import</span> time | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> functools | |
| <span class="hljs-comment"># torch 기울기 비활성화</span> | |
| torch.set_grad_enabled(<span class="hljs-literal">False</span>) | |
| <span class="hljs-comment"># 변수 설정</span> | |
| n_experiments = <span class="hljs-number">2</span> | |
| unet_runs_per_experiment = <span class="hljs-number">50</span> | |
| <span class="hljs-comment"># 입력 불러오기</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_inputs</span>(): | |
| sample = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| timestep = torch.rand(<span class="hljs-number">1</span>, device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) * <span class="hljs-number">999</span> | |
| encoder_hidden_states = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">77</span>, <span class="hljs-number">768</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| <span class="hljs-keyword">return</span> sample, timestep, encoder_hidden_states | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| unet = pipe.unet | |
| unet.<span class="hljs-built_in">eval</span>() | |
| unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># Channels Last 메모리 형식 사용</span> | |
| unet.forward = functools.partial(unet.forward, return_dict=<span class="hljs-literal">False</span>) <span class="hljs-comment"># return_dict=False을 기본값으로 설정</span> | |
| <span class="hljs-comment"># 워밍업</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet(*inputs) | |
| <span class="hljs-comment"># 추적</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"tracing.."</span>) | |
| unet_traced = torch.jit.trace(unet, inputs) | |
| unet_traced.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"done tracing"</span>) | |
| <span class="hljs-comment"># 워밍업 및 그래프 최적화</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">5</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet_traced(*inputs) | |
| <span class="hljs-comment"># 벤치마킹</span> | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet_traced(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet traced inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-comment"># 모델 저장</span> | |
| unet_traced.save(<span class="hljs-string">"unet_traced.pt"</span>)`,wrap:!1}}),bl=new f({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGF0YWNsYXNzZXMlMjBpbXBvcnQlMjBkYXRhY2xhc3MlMEElMEElMEElNDBkYXRhY2xhc3MlMEFjbGFzcyUyMFVOZXQyRENvbmRpdGlvbk91dHB1dCUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUzQSUyMHRvcmNoLlRlbnNvciUwQSUwQSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwaml0dGVkJTIwdW5ldCUyMCVFQyU4MiVBQyVFQyU5QSVBOSUwQXVuZXRfdHJhY2VkJTIwJTNEJTIwdG9yY2guaml0LmxvYWQoJTIydW5ldF90cmFjZWQucHQlMjIpJTBBJTBBJTBBJTIzJTIwcGlwZS51bmV0JTIwJUVDJTgyJUFEJUVDJUEwJTlDJTBBY2xhc3MlMjBUcmFjZWRVTmV0KHRvcmNoLm5uLk1vZHVsZSklM0ElMEElMjAlMjAlMjAlMjBkZWYlMjBfX2luaXRfXyhzZWxmKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuaW5fY2hhbm5lbHMlMjAlM0QlMjBwaXBlLnVuZXQuY29uZmlnLmluX2NoYW5uZWxzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5kZXZpY2UlMjAlM0QlMjBwaXBlLnVuZXQuZGV2aWNlJTBBJTBBJTIwJTIwJTIwJTIwZGVmJTIwZm9yd2FyZChzZWxmJTJDJTIwbGF0ZW50X21vZGVsX2lucHV0JTJDJTIwdCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzYW1wbGUlMjAlM0QlMjB1bmV0X3RyYWNlZChsYXRlbnRfbW9kZWxfaW5wdXQlMkMlMjB0JTJDJTIwZW5jb2Rlcl9oaWRkZW5fc3RhdGVzKSU1QjAlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZXR1cm4lMjBVTmV0MkRDb25kaXRpb25PdXRwdXQoc2FtcGxlJTNEc2FtcGxlKSUwQSUwQSUwQXBpcGUudW5ldCUyMCUzRCUyMFRyYWNlZFVOZXQoKSUwQSUwQXdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwaW1hZ2UlMjAlM0QlMjBwaXBlKCU1QnByb21wdCU1RCUyMColMjAxJTJDJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDUwKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> dataclass | |
| <span class="hljs-meta">@dataclass</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">UNet2DConditionOutput</span>: | |
| sample: torch.Tensor | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># jitted unet 사용</span> | |
| unet_traced = torch.jit.load(<span class="hljs-string">"unet_traced.pt"</span>) | |
| <span class="hljs-comment"># pipe.unet 삭제</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">TracedUNet</span>(torch.nn.Module): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-built_in">super</span>().__init__() | |
| self.in_channels = pipe.unet.config.in_channels | |
| self.device = pipe.unet.device | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, latent_model_input, t, encoder_hidden_states</span>): | |
| sample = unet_traced(latent_model_input, t, encoder_hidden_states)[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> UNet2DConditionOutput(sample=sample) | |
| pipe.unet = TracedUNet() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| image = pipe([prompt] * <span class="hljs-number">1</span>, num_inference_steps=<span class="hljs-number">50</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Cl=new h({props:{title:"Memory-efficient attention",local:"memory-efficient-attention",headingTag:"h2"}}),Vl=new f({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKS50byglMjJjdWRhJTIyKSUwQSUwQXBpcGUuZW5hYmxlX3hmb3JtZXJzX21lbW9yeV9lZmZpY2llbnRfYXR0ZW50aW9uKCklMEElMEF3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUyMCUzRCUyMHBpcGUoJTIyYSUyMHNtYWxsJTIwY2F0JTIyKSUwQSUwQSUyMyUyMCVFQyU4NCVBMCVFRCU4MyU5RCUzQSUyMCVFQyU5RCVCNCVFQiVBNSVCQyUyMCVFQiVCOSU4NCVFRCU5OSU5QyVFQyU4NCVCMSVFRCU5OSU5NCUyMCVFRCU5NSU5OCVFQSVCOCVCMCUyMCVFQyU5QyU4NCVFRCU5NSVCNCUyMCVFQiU4QiVBNCVFQyU5RCU4QyVFQyU5RCU4NCUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMjBwaXBlLmRpc2FibGVfeGZvcm1lcnNfbWVtb3J5X2VmZmljaWVudF9hdHRlbnRpb24oKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| sample = pipe(<span class="hljs-string">"a small cat"</span>) | |
| <span class="hljs-comment"># 선택: 이를 비활성화 하기 위해 다음을 사용할 수 있습니다.</span> | |
| <span class="hljs-comment"># pipe.disable_xformers_memory_efficient_attention()</span>`,wrap:!1}}),Gl=new Fe({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/ko/optimization/fp16.md"}}),{c(){U=p("meta"),m=n(),u=p("p"),d=n(),J(T.$$.fragment),b=n(),R=p("p"),R.innerHTML=Ot,kl=n(),Q=p("p"),Q.textContent=Pt,Sl=n(),V=p("table"),V.innerHTML=Kt,Al=n(),G=p("em"),G.textContent=le,Xl=n(),J($.$$.fragment),xl=n(),E=p("p"),E.innerHTML=te,Nl=n(),_=p("p"),_.innerHTML=ee,Fl=n(),J(W.$$.fragment),Yl=n(),J(g.$$.fragment),Hl=n(),v=p("p"),v.innerHTML=se,zl=n(),J(k.$$.fragment),Dl=n(),J(S.$$.fragment),Ll=n(),A=p("p"),A.innerHTML=ne,ql=n(),J(X.$$.fragment),Ol=n(),J(C.$$.fragment),Pl=n(),J(x.$$.fragment),Kl=n(),N=p("p"),N.textContent=ae,lt=n(),J(I.$$.fragment),tt=n(),F=p("p"),F.innerHTML=pe,et=n(),J(Y.$$.fragment),st=n(),H=p("p"),H.textContent=Me,nt=n(),J(z.$$.fragment),at=n(),D=p("p"),D.textContent=ie,pt=n(),L=p("p"),L.innerHTML=me,Mt=n(),q=p("p"),q.innerHTML=Ue,it=n(),J(O.$$.fragment),mt=n(),P=p("p"),P.textContent=Je,Ut=n(),_l=p("a"),Jt=$l(` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),K=p("p"),K.textContent=ce,ct=n(),ll=p("p"),ll.innerHTML=oe,ot=n(),J(tl.$$.fragment),rt=n(),el=p("p"),el.textContent=re,yt=n(),sl=p("p"),sl.textContent=ye,wt=n(),J(Z.$$.fragment),Tt=n(),nl=p("p"),nl.textContent=we,ut=n(),J(al.$$.fragment),ft=n(),pl=p("p"),pl.innerHTML=Te,jt=n(),Wl=p("a"),dt=$l(` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),Ml=p("p"),Ml.innerHTML=ue,ht=n(),il=p("p"),il.innerHTML=fe,bt=n(),ml=p("p"),ml.textContent=je,Ct=n(),Ul=p("p"),Ul.innerHTML=de,It=n(),J(Jl.$$.fragment),Zt=n(),cl=p("p"),cl.textContent=he,Bt=n(),J(ol.$$.fragment),Rt=n(),J(B.$$.fragment),Qt=n(),J(rl.$$.fragment),Vt=n(),yl=p("p"),yl.textContent=be,Gt=n(),wl=p("p"),wl.textContent=Ce,$t=n(),J(Tl.$$.fragment),Et=n(),J(ul.$$.fragment),_t=n(),fl=p("p"),fl.textContent=Ie,Wt=n(),jl=p("p"),jl.textContent=Ze,gt=n(),J(dl.$$.fragment),vt=n(),hl=p("p"),hl.innerHTML=Be,kt=n(),J(bl.$$.fragment),St=n(),J(Cl.$$.fragment),At=n(),Il=p("p"),Il.innerHTML=Re,Xt=n(),Zl=p("p"),Zl.textContent=Qe,xt=n(),Bl=p("table"),Bl.innerHTML=Ve,Nt=n(),Rl=p("p"),Rl.textContent=Ge,Ft=n(),Ql=p("ul"),Ql.innerHTML=$e,Yt=n(),J(Vl.$$.fragment),Ht=n(),J(Gl.$$.fragment),zt=n(),gl=p("p"),this.h()},l(l){const t=xe("svelte-u9bgzb",document.head);U=M(t,"META",{name:!0,content:!0}),t.forEach(e),m=a(l),u=M(l,"P",{}),Lt(u).forEach(e),d=a(l),c(T.$$.fragment,l),b=a(l),R=M(l,"P",{"data-svelte-h":!0}),i(R)!=="svelte-m9figs"&&(R.innerHTML=Ot),kl=a(l),Q=M(l,"P",{"data-svelte-h":!0}),i(Q)!=="svelte-12e7d13"&&(Q.textContent=Pt),Sl=a(l),V=M(l,"TABLE",{"data-svelte-h":!0}),i(V)!=="svelte-1sy2nlq"&&(V.innerHTML=Kt),Al=a(l),G=M(l,"EM",{"data-svelte-h":!0}),i(G)!=="svelte-1iy2bqt"&&(G.textContent=le),Xl=a(l),c($.$$.fragment,l),xl=a(l),E=M(l,"P",{"data-svelte-h":!0}),i(E)!=="svelte-1q6vy6i"&&(E.innerHTML=te),Nl=a(l),_=M(l,"P",{"data-svelte-h":!0}),i(_)!=="svelte-1xwtu1p"&&(_.innerHTML=ee),Fl=a(l),c(W.$$.fragment,l),Yl=a(l),c(g.$$.fragment,l),Hl=a(l),v=M(l,"P",{"data-svelte-h":!0}),i(v)!=="svelte-1gm9l4i"&&(v.innerHTML=se),zl=a(l),c(k.$$.fragment,l),Dl=a(l),c(S.$$.fragment,l),Ll=a(l),A=M(l,"P",{"data-svelte-h":!0}),i(A)!=="svelte-1y9jw5r"&&(A.innerHTML=ne),ql=a(l),c(X.$$.fragment,l),Ol=a(l),c(C.$$.fragment,l),Pl=a(l),c(x.$$.fragment,l),Kl=a(l),N=M(l,"P",{"data-svelte-h":!0}),i(N)!=="svelte-97drxa"&&(N.textContent=ae),lt=a(l),c(I.$$.fragment,l),tt=a(l),F=M(l,"P",{"data-svelte-h":!0}),i(F)!=="svelte-19j5lzh"&&(F.innerHTML=pe),et=a(l),c(Y.$$.fragment,l),st=a(l),H=M(l,"P",{"data-svelte-h":!0}),i(H)!=="svelte-1809mre"&&(H.textContent=Me),nt=a(l),c(z.$$.fragment,l),at=a(l),D=M(l,"P",{"data-svelte-h":!0}),i(D)!=="svelte-1klv9ve"&&(D.textContent=ie),pt=a(l),L=M(l,"P",{"data-svelte-h":!0}),i(L)!=="svelte-1bo4p0c"&&(L.innerHTML=me),Mt=a(l),q=M(l,"P",{"data-svelte-h":!0}),i(q)!=="svelte-j8mqed"&&(q.innerHTML=Ue),it=a(l),c(O.$$.fragment,l),mt=a(l),P=M(l,"P",{"data-svelte-h":!0}),i(P)!=="svelte-1l99s96"&&(P.textContent=Je),Ut=a(l),_l=M(l,"A",{name:!0}),Lt(_l).forEach(e),Jt=El(l,` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),K=M(l,"P",{"data-svelte-h":!0}),i(K)!=="svelte-131mxrq"&&(K.textContent=ce),ct=a(l),ll=M(l,"P",{"data-svelte-h":!0}),i(ll)!=="svelte-f0b7n3"&&(ll.innerHTML=oe),ot=a(l),c(tl.$$.fragment,l),rt=a(l),el=M(l,"P",{"data-svelte-h":!0}),i(el)!=="svelte-tablwz"&&(el.textContent=re),yt=a(l),sl=M(l,"P",{"data-svelte-h":!0}),i(sl)!=="svelte-1tbver6"&&(sl.textContent=ye),wt=a(l),c(Z.$$.fragment,l),Tt=a(l),nl=M(l,"P",{"data-svelte-h":!0}),i(nl)!=="svelte-1hfwhk5"&&(nl.textContent=we),ut=a(l),c(al.$$.fragment,l),ft=a(l),pl=M(l,"P",{"data-svelte-h":!0}),i(pl)!=="svelte-ax3hx7"&&(pl.innerHTML=Te),jt=a(l),Wl=M(l,"A",{name:!0}),Lt(Wl).forEach(e),dt=El(l,` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),Ml=M(l,"P",{"data-svelte-h":!0}),i(Ml)!=="svelte-7dwxx7"&&(Ml.innerHTML=ue),ht=a(l),il=M(l,"P",{"data-svelte-h":!0}),i(il)!=="svelte-1llz1y7"&&(il.innerHTML=fe),bt=a(l),ml=M(l,"P",{"data-svelte-h":!0}),i(ml)!=="svelte-1hmauk"&&(ml.textContent=je),Ct=a(l),Ul=M(l,"P",{"data-svelte-h":!0}),i(Ul)!=="svelte-1gllwmo"&&(Ul.innerHTML=de),It=a(l),c(Jl.$$.fragment,l),Zt=a(l),cl=M(l,"P",{"data-svelte-h":!0}),i(cl)!=="svelte-5q5hse"&&(cl.textContent=he),Bt=a(l),c(ol.$$.fragment,l),Rt=a(l),c(B.$$.fragment,l),Qt=a(l),c(rl.$$.fragment,l),Vt=a(l),yl=M(l,"P",{"data-svelte-h":!0}),i(yl)!=="svelte-1rrl8zz"&&(yl.textContent=be),Gt=a(l),wl=M(l,"P",{"data-svelte-h":!0}),i(wl)!=="svelte-1c0oa55"&&(wl.textContent=Ce),$t=a(l),c(Tl.$$.fragment,l),Et=a(l),c(ul.$$.fragment,l),_t=a(l),fl=M(l,"P",{"data-svelte-h":!0}),i(fl)!=="svelte-1jkjfr1"&&(fl.textContent=Ie),Wt=a(l),jl=M(l,"P",{"data-svelte-h":!0}),i(jl)!=="svelte-mqbplb"&&(jl.textContent=Ze),gt=a(l),c(dl.$$.fragment,l),vt=a(l),hl=M(l,"P",{"data-svelte-h":!0}),i(hl)!=="svelte-1ewkmr2"&&(hl.innerHTML=Be),kt=a(l),c(bl.$$.fragment,l),St=a(l),c(Cl.$$.fragment,l),At=a(l),Il=M(l,"P",{"data-svelte-h":!0}),i(Il)!=="svelte-1lkmvkn"&&(Il.innerHTML=Re),Xt=a(l),Zl=M(l,"P",{"data-svelte-h":!0}),i(Zl)!=="svelte-1aa24j0"&&(Zl.textContent=Qe),xt=a(l),Bl=M(l,"TABLE",{"data-svelte-h":!0}),i(Bl)!=="svelte-13acbqe"&&(Bl.innerHTML=Ve),Nt=a(l),Rl=M(l,"P",{"data-svelte-h":!0}),i(Rl)!=="svelte-j9132p"&&(Rl.textContent=Ge),Ft=a(l),Ql=M(l,"UL",{"data-svelte-h":!0}),i(Ql)!=="svelte-gqxwyg"&&(Ql.innerHTML=$e),Yt=a(l),c(Vl.$$.fragment,l),Ht=a(l),c(Gl.$$.fragment,l),zt=a(l),gl=M(l,"P",{}),Lt(gl).forEach(e),this.h()},h(){vl(U,"name","hf:doc:metadata"),vl(U,"content",qe),vl(_l,"name","sequential_offloading"),vl(Wl,"name","model_offloading")},m(l,t){Ne(document.head,U),s(l,m,t),s(l,u,t),s(l,d,t),o(T,l,t),s(l,b,t),s(l,R,t),s(l,kl,t),s(l,Q,t),s(l,Sl,t),s(l,V,t),s(l,Al,t),s(l,G,t),s(l,Xl,t),o($,l,t),s(l,xl,t),s(l,E,t),s(l,Nl,t),s(l,_,t),s(l,Fl,t),o(W,l,t),s(l,Yl,t),o(g,l,t),s(l,Hl,t),s(l,v,t),s(l,zl,t),o(k,l,t),s(l,Dl,t),o(S,l,t),s(l,Ll,t),s(l,A,t),s(l,ql,t),o(X,l,t),s(l,Ol,t),o(C,l,t),s(l,Pl,t),o(x,l,t),s(l,Kl,t),s(l,N,t),s(l,lt,t),o(I,l,t),s(l,tt,t),s(l,F,t),s(l,et,t),o(Y,l,t),s(l,st,t),s(l,H,t),s(l,nt,t),o(z,l,t),s(l,at,t),s(l,D,t),s(l,pt,t),s(l,L,t),s(l,Mt,t),s(l,q,t),s(l,it,t),o(O,l,t),s(l,mt,t),s(l,P,t),s(l,Ut,t),s(l,_l,t),s(l,Jt,t),s(l,K,t),s(l,ct,t),s(l,ll,t),s(l,ot,t),o(tl,l,t),s(l,rt,t),s(l,el,t),s(l,yt,t),s(l,sl,t),s(l,wt,t),o(Z,l,t),s(l,Tt,t),s(l,nl,t),s(l,ut,t),o(al,l,t),s(l,ft,t),s(l,pl,t),s(l,jt,t),s(l,Wl,t),s(l,dt,t),s(l,Ml,t),s(l,ht,t),s(l,il,t),s(l,bt,t),s(l,ml,t),s(l,Ct,t),s(l,Ul,t),s(l,It,t),o(Jl,l,t),s(l,Zt,t),s(l,cl,t),s(l,Bt,t),o(ol,l,t),s(l,Rt,t),o(B,l,t),s(l,Qt,t),o(rl,l,t),s(l,Vt,t),s(l,yl,t),s(l,Gt,t),s(l,wl,t),s(l,$t,t),o(Tl,l,t),s(l,Et,t),o(ul,l,t),s(l,_t,t),s(l,fl,t),s(l,Wt,t),s(l,jl,t),s(l,gt,t),o(dl,l,t),s(l,vt,t),s(l,hl,t),s(l,kt,t),o(bl,l,t),s(l,St,t),o(Cl,l,t),s(l,At,t),s(l,Il,t),s(l,Xt,t),s(l,Zl,t),s(l,xt,t),s(l,Bl,t),s(l,Nt,t),s(l,Rl,t),s(l,Ft,t),s(l,Ql,t),s(l,Yt,t),o(Vl,l,t),s(l,Ht,t),o(Gl,l,t),s(l,zt,t),s(l,gl,t),Dt=!0},p(l,[t]){const Ee={};t&2&&(Ee.$$scope={dirty:t,ctx:l}),C.$set(Ee);const _e={};t&2&&(_e.$$scope={dirty:t,ctx:l}),I.$set(_e);const We={};t&2&&(We.$$scope={dirty:t,ctx:l}),Z.$set(We);const ge={};t&2&&(ge.$$scope={dirty:t,ctx:l}),B.$set(ge)},i(l){Dt||(r(T.$$.fragment,l),r($.$$.fragment,l),r(W.$$.fragment,l),r(g.$$.fragment,l),r(k.$$.fragment,l),r(S.$$.fragment,l),r(X.$$.fragment,l),r(C.$$.fragment,l),r(x.$$.fragment,l),r(I.$$.fragment,l),r(Y.$$.fragment,l),r(z.$$.fragment,l),r(O.$$.fragment,l),r(tl.$$.fragment,l),r(Z.$$.fragment,l),r(al.$$.fragment,l),r(Jl.$$.fragment,l),r(ol.$$.fragment,l),r(B.$$.fragment,l),r(rl.$$.fragment,l),r(Tl.$$.fragment,l),r(ul.$$.fragment,l),r(dl.$$.fragment,l),r(bl.$$.fragment,l),r(Cl.$$.fragment,l),r(Vl.$$.fragment,l),r(Gl.$$.fragment,l),Dt=!0)},o(l){y(T.$$.fragment,l),y($.$$.fragment,l),y(W.$$.fragment,l),y(g.$$.fragment,l),y(k.$$.fragment,l),y(S.$$.fragment,l),y(X.$$.fragment,l),y(C.$$.fragment,l),y(x.$$.fragment,l),y(I.$$.fragment,l),y(Y.$$.fragment,l),y(z.$$.fragment,l),y(O.$$.fragment,l),y(tl.$$.fragment,l),y(Z.$$.fragment,l),y(al.$$.fragment,l),y(Jl.$$.fragment,l),y(ol.$$.fragment,l),y(B.$$.fragment,l),y(rl.$$.fragment,l),y(Tl.$$.fragment,l),y(ul.$$.fragment,l),y(dl.$$.fragment,l),y(bl.$$.fragment,l),y(Cl.$$.fragment,l),y(Vl.$$.fragment,l),y(Gl.$$.fragment,l),Dt=!1},d(l){l&&(e(m),e(u),e(d),e(b),e(R),e(kl),e(Q),e(Sl),e(V),e(Al),e(G),e(Xl),e(xl),e(E),e(Nl),e(_),e(Fl),e(Yl),e(Hl),e(v),e(zl),e(Dl),e(Ll),e(A),e(ql),e(Ol),e(Pl),e(Kl),e(N),e(lt),e(tt),e(F),e(et),e(st),e(H),e(nt),e(at),e(D),e(pt),e(L),e(Mt),e(q),e(it),e(mt),e(P),e(Ut),e(_l),e(Jt),e(K),e(ct),e(ll),e(ot),e(rt),e(el),e(yt),e(sl),e(wt),e(Tt),e(nl),e(ut),e(ft),e(pl),e(jt),e(Wl),e(dt),e(Ml),e(ht),e(il),e(bt),e(ml),e(Ct),e(Ul),e(It),e(Zt),e(cl),e(Bt),e(Rt),e(Qt),e(Vt),e(yl),e(Gt),e(wl),e($t),e(Et),e(_t),e(fl),e(Wt),e(jl),e(gt),e(vt),e(hl),e(kt),e(St),e(At),e(Il),e(Xt),e(Zl),e(xt),e(Bl),e(Nt),e(Rl),e(Ft),e(Ql),e(Yt),e(Ht),e(zt),e(gl)),e(U),w(T,l),w($,l),w(W,l),w(g,l),w(k,l),w(S,l),w(X,l),w(C,l),w(x,l),w(I,l),w(Y,l),w(z,l),w(O,l),w(tl,l),w(Z,l),w(al,l),w(Jl,l),w(ol,l),w(B,l),w(rl,l),w(Tl,l),w(ul,l),w(dl,l),w(bl,l),w(Cl,l),w(Vl,l),w(Gl,l)}}}const qe='{"title":"메모리와 속도","local":"메모리와-속도","sections":[{"title":"cuDNN auto-tuner 활성화하기","local":"cudnn-auto-tuner-활성화하기","sections":[{"title":"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)","local":"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서","sections":[],"depth":3}],"depth":2},{"title":"반정밀도 가중치","local":"반정밀도-가중치","sections":[],"depth":2},{"title":"추가 메모리 절약을 위한 슬라이스 어텐션","local":"추가-메모리-절약을-위한-슬라이스-어텐션","sections":[],"depth":2},{"title":"더 큰 배치를 위한 sliced VAE 디코드","local":"더-큰-배치를-위한-sliced-vae-디코드","sections":[],"depth":2},{"title":"Channels Last 메모리 형식 사용하기","local":"channels-last-메모리-형식-사용하기","sections":[],"depth":2},{"title":"추적(tracing)","local":"추적tracing","sections":[],"depth":2},{"title":"Memory-efficient attention","local":"memory-efficient-attention","sections":[],"depth":2}],"depth":1}';function Oe(j){return ke(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ss extends Ae{constructor(U){super(),Xe(this,U,Oe,Le,ve,{})}}export{ss as component}; | |
Xet Storage Details
- Size:
- 49.6 kB
- Xet hash:
- 5b70a3c5e8f18cadb14d0d4ef32b83988abce96a673aaa23bb6ead7753c9293f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.