Buckets:
| import{s as ms,o as fs,n as hs}from"../chunks/scheduler.8c3d61f6.js";import{S as gs,i as us,g as r,s as n,r as d,m as ds,A as _s,h as l,f as t,c as s,j as u,u as c,x as p,n as cs,k as y,y as a,a as i,v as m,d as f,t as h,w as g}from"../chunks/index.da70eac4.js";import{T as bs}from"../chunks/Tip.1d9b8c37.js";import{D as Z}from"../chunks/Docstring.d7448bb3.js";import{C as w}from"../chunks/CodeBlock.a9c4becf.js";import{H as v,E as Ts}from"../chunks/getInferenceSnippets.1d18021a.js";function ys(It){let _,D='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){_=r("p"),_.innerHTML=D},l(x){_=l(x,"P",{"data-svelte-h":!0}),p(_)!=="svelte-1qn15hi"&&(_.innerHTML=D)},m(x,Ke){i(x,_,Ke)},p:hs,d(x){x&&t(_)}}}function ws(It){let _,D,x,Ke,Q,Ct,X,xn='<img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>',Xt,E,Zn='<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">Text2Video-Zero: Text-to-Image Diffusion Models are Zero-Shot Video Generators</a> is by Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, <a href="https://www.ece.utexas.edu/people/faculty/atlas-wang" rel="nofollow">Zhangyang Wang</a>, Shant Navasardyan, <a href="https://www.humphreyshi.com" rel="nofollow">Humphrey Shi</a>.',Wt,Y,Mn="Text2Video-Zero enables zero-shot video generation using either:",Gt,F,Jn="<li>A textual prompt</li> <li>A prompt combined with guidance from poses or edges</li> <li>Video Instruct-Pix2Pix (instruction-guided video editing)</li>",$t,H,Un="Results are temporally consistent and closely follow the guidance and textual prompts.",Nt,q,Vn='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/t2v_zero_teaser.png" alt="teaser-img"/>',St,A,jn="The abstract from the paper is:",Pt,O,kn=`<em>Recent text-to-video generation approaches rely on computationally heavy training and require large-scale video datasets. In this paper, we introduce a new task of zero-shot text-to-video generation and propose a low-cost approach (without any training or optimization) by leveraging the power of existing text-to-image synthesis methods (e.g., Stable Diffusion), making them suitable for the video domain. | |
| Our key modifications include (i) enriching the latent codes of the generated frames with motion dynamics to keep the global scene and the background time consistent; and (ii) reprogramming frame-level self-attention using a new cross-frame attention of each frame on the first frame, to preserve the context, appearance, and identity of the foreground object. | |
| Experiments show that this leads to low overhead, yet high-quality and remarkably consistent video generation. Moreover, our approach is not limited to text-to-video synthesis but is also applicable to other tasks such as conditional and content-specialized video generation, and Video Instruct-Pix2Pix, i.e., instruction-guided video editing. | |
| As experiments show, our method performs comparably or sometimes better than recent approaches, despite not being trained on additional video data.</em>`,Bt,K,In='You can find additional information about Text2Video-Zero on the <a href="https://text2video-zero.github.io/" rel="nofollow">project page</a>, <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, and <a href="https://github.com/Picsart-AI-Research/Text2Video-Zero" rel="nofollow">original codebase</a>.',Lt,ee,Rt,te,zt,oe,Cn="To generate a video from prompt, run the following Python code:",Dt,ne,Qt,se,Xn="You can change these parameters in the pipeline call:",Et,ae,Wn='<li>Motion field strength (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1):<ul><li><code>motion_field_strength_x</code> and <code>motion_field_strength_y</code>. Default: <code>motion_field_strength_x=12</code>, <code>motion_field_strength_y=12</code></li></ul></li> <li><code>T</code> and <code>T'</code> (see the <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1)<ul><li><code>t0</code> and <code>t1</code> in the range <code>{0, ..., num_inference_steps}</code>. Default: <code>t0=45</code>, <code>t1=48</code></li></ul></li> <li>Video length:<ul><li><code>video_length</code>, the number of frames video_length to be generated. Default: <code>video_length=8</code></li></ul></li>',Yt,ie,Gn="We can also generate longer videos by doing the processing in a chunk-by-chunk manner:",Ft,re,Ht,et,W,le,Bo,tt,$n="TextToVideoZeroSDXLPipeline",Lo,qt,pe,At,de,Ot,ce,Nn="To generate a video from prompt with additional pose control",Kt,ot,me,nt,Sn="Download a demo video",Ro,fe,eo,j,k,st,Pn="Read video containing extracted pose images",zo,he,Do,at,Bn='To extract pose from actual video, read <a href="controlnet">ControlNet documentation</a>.',Qo,ge,it,Ln="Run <code>StableDiffusionControlNetPipeline</code> with our custom attention processor",Eo,ue,to,rt,I,_e,Yo,lt,Rn="Since our attention processor also works with SDXL, it can be utilized to generate a video from prompt using ControlNet models powered by SDXL:",Fo,be,oo,Te,no,ye,zn='To generate a video from prompt with additional Canny edge control, follow the same steps described above for pose-guided generation using <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a>.',so,we,ao,ve,Dn='To perform text-guided video editing (with <a href="pix2pix">InstructPix2Pix</a>):',io,M,xe,pt,Qn="Download a demo video",Ho,Ze,qo,Me,dt,En="Read video from path",Ao,Je,Oo,Ue,ct,Yn="Run <code>StableDiffusionInstructPix2PixPipeline</code> with our custom attention processor",Ko,Ve,ro,je,lo,ke,Fn=`Methods <strong>Text-To-Video</strong>, <strong>Text-To-Video with Pose Control</strong> and <strong>Text-To-Video with Edge Control</strong> | |
| can run with custom <a href="../../training/dreambooth">DreamBooth</a> models, as shown below for | |
| <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a> and | |
| <a href="https://huggingface.co/PAIR/text2video-zero-controlnet-canny-avatar" rel="nofollow">Avatar style DreamBooth</a> model:`,po,J,Ie,mt,Hn="Download a demo video",en,Ce,tn,Xe,ft,qn="Read video from path",on,We,nn,Ge,ht,An="Run <code>StableDiffusionControlNetPipeline</code> with custom trained DreamBooth model",sn,$e,co,Ne,On='You can filter out some available DreamBooth-trained models with <a href="https://huggingface.co/models?search=dreambooth" rel="nofollow">this link</a>.',mo,G,fo,Se,ho,b,Pe,an,$,Be,rn,gt,Kn="The call function to the pipeline for generation.",ln,N,Le,pn,ut,es="Perform backward process given list of time steps.",dn,S,Re,cn,_t,ts="Encodes the prompt into text encoder hidden states.",mn,P,ze,fn,bt,os="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",go,De,uo,T,Qe,hn,B,Ee,gn,Tt,ns="Function invoked when calling the pipeline for generation.",un,L,Ye,_n,yt,ss="Perform backward process given list of time steps",bn,R,Fe,Tn,wt,as="Encodes the prompt into text encoder hidden states.",yn,z,He,wn,vt,is="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",_o,qe,bo,C,Ae,vn,xt,rs="Output class for zero-shot text-to-video pipeline.",To,Oe,yo,Vt,wo;return Q=new v({props:{title:"Text2Video-Zero",local:"text2video-zero",headingTag:"h1"}}),ee=new v({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),te=new v({props:{title:"Text-To-Video",local:"text-to-video",headingTag:"h3"}}),ne=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBpbWFnZWlvJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBwYW5kYSUyMGlzJTIwcGxheWluZyUyMGd1aXRhciUyMG9uJTIwdGltZXMlMjBzcXVhcmUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCkuaW1hZ2VzJTBBcmVzdWx0JTIwJTNEJTIwJTVCKHIlMjAqJTIwMjU1KS5hc3R5cGUoJTIydWludDglMjIpJTIwZm9yJTIwciUyMGluJTIwcmVzdWx0JTVEJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> imageio | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| result = pipe(prompt=prompt).images | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),re=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1plcm9QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBc2VlZCUyMCUzRCUyMDAlMEF2aWRlb19sZW5ndGglMjAlM0QlMjAyNCUyMCUyMCUyMzI0JTIwJUMzJUI3JTIwNGZwcyUyMCUzRCUyMDYlMjBzZWNvbmRzJTBBY2h1bmtfc2l6ZSUyMCUzRCUyMDglMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwcGFuZGElMjBpcyUyMHBsYXlpbmclMjBndWl0YXIlMjBvbiUyMHRpbWVzJTIwc3F1YXJlJTIyJTBBJTBBJTIzJTIwR2VuZXJhdGUlMjB0aGUlMjB2aWRlbyUyMGNodW5rLWJ5LWNodW5rJTBBcmVzdWx0JTIwJTNEJTIwJTVCJTVEJTBBY2h1bmtfaWRzJTIwJTNEJTIwbnAuYXJhbmdlKDAlMkMlMjB2aWRlb19sZW5ndGglMkMlMjBjaHVua19zaXplJTIwLSUyMDEpJTBBZ2VuZXJhdG9yJTIwJTNEJTIwdG9yY2guR2VuZXJhdG9yKGRldmljZSUzRCUyMmN1ZGElMjIpJTBBZm9yJTIwaSUyMGluJTIwcmFuZ2UobGVuKGNodW5rX2lkcykpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlByb2Nlc3NpbmclMjBjaHVuayUyMCU3QmklMjAlMkIlMjAxJTdEJTIwJTJGJTIwJTdCbGVuKGNodW5rX2lkcyklN0QlMjIpJTBBJTIwJTIwJTIwJTIwY2hfc3RhcnQlMjAlM0QlMjBjaHVua19pZHMlNUJpJTVEJTBBJTIwJTIwJTIwJTIwY2hfZW5kJTIwJTNEJTIwdmlkZW9fbGVuZ3RoJTIwaWYlMjBpJTIwJTNEJTNEJTIwbGVuKGNodW5rX2lkcyklMjAtJTIwMSUyMGVsc2UlMjBjaHVua19pZHMlNUJpJTIwJTJCJTIwMSU1RCUwQSUyMCUyMCUyMCUyMCUyMyUyMEF0dGFjaCUyMHRoZSUyMGZpcnN0JTIwZnJhbWUlMjBmb3IlMjBDcm9zcyUyMEZyYW1lJTIwQXR0ZW50aW9uJTBBJTIwJTIwJTIwJTIwZnJhbWVfaWRzJTIwJTNEJTIwJTVCMCU1RCUyMCUyQiUyMGxpc3QocmFuZ2UoY2hfc3RhcnQlMkMlMjBjaF9lbmQpKSUwQSUyMCUyMCUyMCUyMCUyMyUyMEZpeCUyMHRoZSUyMHNlZWQlMjBmb3IlMjB0aGUlMjB0ZW1wb3JhbCUyMGNvbnNpc3RlbmN5JTBBJTIwJTIwJTIwJTIwZ2VuZXJhdG9yLm1hbnVhbF9zZWVkKHNlZWQpJTBBJTIwJTIwJTIwJTIwb3V0cHV0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjB2aWRlb19sZW5ndGglM0RsZW4oZnJhbWVfaWRzKSUyQyUyMGdlbmVyYXRvciUzRGdlbmVyYXRvciUyQyUyMGZyYW1lX2lkcyUzRGZyYW1lX2lkcyklMEElMjAlMjAlMjAlMjByZXN1bHQuYXBwZW5kKG91dHB1dC5pbWFnZXMlNUIxJTNBJTVEKSUwQSUwQSUyMyUyMENvbmNhdGVuYXRlJTIwY2h1bmtzJTIwYW5kJTIwc2F2ZSUwQXJlc3VsdCUyMCUzRCUyMG5wLmNvbmNhdGVuYXRlKHJlc3VsdCklMEFyZXN1bHQlMjAlM0QlMjAlNUIociUyMColMjAyNTUpLmFzdHlwZSglMjJ1aW50OCUyMiklMjBmb3IlMjByJTIwaW4lMjByZXN1bHQlNUQlMEFpbWFnZWlvLm1pbXNhdmUoJTIydmlkZW8ubXA0JTIyJTJDJTIwcmVzdWx0JTJDJTIwZnBzJTNENCk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| seed = <span class="hljs-number">0</span> | |
| video_length = <span class="hljs-number">24</span> <span class="hljs-comment">#24 ÷ 4fps = 6 seconds</span> | |
| chunk_size = <span class="hljs-number">8</span> | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| <span class="hljs-comment"># Generate the video chunk-by-chunk</span> | |
| result = [] | |
| chunk_ids = np.arange(<span class="hljs-number">0</span>, video_length, chunk_size - <span class="hljs-number">1</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(chunk_ids)): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Processing chunk <span class="hljs-subst">{i + <span class="hljs-number">1</span>}</span> / <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk_ids)}</span>"</span>) | |
| ch_start = chunk_ids[i] | |
| ch_end = video_length <span class="hljs-keyword">if</span> i == <span class="hljs-built_in">len</span>(chunk_ids) - <span class="hljs-number">1</span> <span class="hljs-keyword">else</span> chunk_ids[i + <span class="hljs-number">1</span>] | |
| <span class="hljs-comment"># Attach the first frame for Cross Frame Attention</span> | |
| frame_ids = [<span class="hljs-number">0</span>] + <span class="hljs-built_in">list</span>(<span class="hljs-built_in">range</span>(ch_start, ch_end)) | |
| <span class="hljs-comment"># Fix the seed for the temporal consistency</span> | |
| generator.manual_seed(seed) | |
| output = pipe(prompt=prompt, video_length=<span class="hljs-built_in">len</span>(frame_ids), generator=generator, frame_ids=frame_ids) | |
| result.append(output.images[<span class="hljs-number">1</span>:]) | |
| <span class="hljs-comment"># Concatenate chunks and save</span> | |
| result = np.concatenate(result) | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),le=new v({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),pe=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHZhcmlhbnQlM0QlMjJmcDE2JTIyJTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSkudG8oJTIyY3VkYSUyMik=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroSDXLPipeline | |
| model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span> | |
| pipe = TextToVideoZeroSDXLPipeline.from_pretrained( | |
| model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),de=new v({props:{title:"Text-To-Video with Pose Control",local:"text-to-video-with-pose-control",headingTag:"h3"}}),fe=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBvc2VzX3NrZWxldG9uX2dpZnMlMkZkYW5jZTFfY29yci5tcDQlMjIlMEFyZXBvX2lkJTIwJTNEJTIwJTIyUEFJUiUyRlRleHQyVmlkZW8tWmVybyUyMiUwQXZpZGVvX3BhdGglMjAlM0QlMjBoZl9odWJfZG93bmxvYWQocmVwb190eXBlJTNEJTIyc3BhY2UlMjIlMkMlMjByZXBvX2lkJTNEcmVwb19pZCUyQyUyMGZpbGVuYW1lJTNEZmlsZW5hbWUp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/poses_skeleton_gifs/dance1_corr.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),he=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXBvc2VfaW1hZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| pose_images = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),ue=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LW9wZW5wb3NlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-openpose"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),_e=new v({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),be=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uWExDb250cm9sTmV0UGlwZWxpbmUlMkMlMjBDb250cm9sTmV0TW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQWNvbnRyb2xuZXRfbW9kZWxfaWQlMjAlM0QlMjAndGhpYmF1ZCUyRmNvbnRyb2xuZXQtb3BlbnBvc2Utc2R4bC0xLjAnJTBBbW9kZWxfaWQlMjAlM0QlMjAnc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJyUwQSUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGNvbnRyb2xuZXRfbW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTA5bW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJ2N1ZGEnKSUwQSUwQSUyMyUyMFNldCUyMHRoZSUyMGF0dGVudGlvbiUyMHByb2Nlc3NvciUwQXBpcGUudW5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQXBpcGUuY29udHJvbG5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQSUwQSUyMyUyMGZpeCUyMGxhdGVudHMlMjBmb3IlMjBhbGwlMjBmcmFtZXMlMEFsYXRlbnRzJTIwJTNEJTIwdG9yY2gucmFuZG4oKDElMkMlMjA0JTJDJTIwMTI4JTJDJTIwMTI4KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| controlnet_model_id = <span class="hljs-string">'thibaud/controlnet-openpose-sdxl-1.0'</span> | |
| model_id = <span class="hljs-string">'stabilityai/stable-diffusion-xl-base-1.0'</span> | |
| controlnet = ControlNetModel.from_pretrained(controlnet_model_id, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">'cuda'</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),Te=new v({props:{title:"Text-To-Video with Edge Control",local:"text-to-video-with-edge-control",headingTag:"h3"}}),we=new v({props:{title:"Video Instruct-Pix2Pix",local:"video-instruct-pix2pix",headingTag:"h3"}}),Ze=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBpeDJwaXglMjB2aWRlbyUyRmNhbWVsLm1wNCUyMiUwQXJlcG9faWQlMjAlM0QlMjAlMjJQQUlSJTJGVGV4dDJWaWRlby1aZXJvJTIyJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGhmX2h1Yl9kb3dubG9hZChyZXBvX3R5cGUlM0QlMjJzcGFjZSUyMiUyQyUyMHJlcG9faWQlM0RyZXBvX2lkJTJDJTIwZmlsZW5hbWUlM0RmaWxlbmFtZSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/pix2pix video/camel.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Je=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXZpZGVvJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| video = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),Ve=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uSW5zdHJ1Y3RQaXgyUGl4UGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIydGltYnJvb2tzJTJGaW5zdHJ1Y3QtcGl4MnBpeCUyMiUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25JbnN0cnVjdFBpeDJQaXhQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMykpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIybWFrZSUyMGl0JTIwVmFuJTIwR29naCUyMFN0YXJyeSUyME5pZ2h0JTIwc3R5bGUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4odmlkZW8pJTJDJTIwaW1hZ2UlM0R2aWRlbykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMmVkaXRlZF92aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"timbrooks/instruct-pix2pix"</span> | |
| pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">3</span>)) | |
| prompt = <span class="hljs-string">"make it Van Gogh Starry Night style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(video), image=video).images | |
| imageio.mimsave(<span class="hljs-string">"edited_video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),je=new v({props:{title:"DreamBooth specialization",local:"dreambooth-specialization",headingTag:"h3"}}),Ce=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRmNhbm55X3ZpZGVvc19tcDQlMkZnaXJsX3R1cm5pbmcubXA0JTIyJTBBcmVwb19pZCUyMCUzRCUyMCUyMlBBSVIlMkZUZXh0MlZpZGVvLVplcm8lMjIlMEF2aWRlb19wYXRoJTIwJTNEJTIwaGZfaHViX2Rvd25sb2FkKHJlcG9fdHlwZSUzRCUyMnNwYWNlJTIyJTJDJTIwcmVwb19pZCUzRHJlcG9faWQlMkMlMjBmaWxlbmFtZSUzRGZpbGVuYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/canny_videos_mp4/girl_turning.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),We=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQWNhbm55X2VkZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| canny_edges = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),$e=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEElMjMlMjBzZXQlMjBtb2RlbCUyMGlkJTIwdG8lMjBjdXN0b20lMjBtb2RlbCUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyUEFJUiUyRnRleHQydmlkZW8temVyby1jb250cm9sbmV0LWNhbm55LWF2YXRhciUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LWNhbm55JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4oY2FubnlfZWRnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMm9pbCUyMHBhaW50aW5nJTIwb2YlMjBhJTIwYmVhdXRpZnVsJTIwZ2lybCUyMGF2YXRhciUyMHN0eWxlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKGNhbm55X2VkZ2VzKSUyQyUyMGltYWdlJTNEY2FubnlfZWRnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| <span class="hljs-comment"># set model id to custom model</span> | |
| model_id = <span class="hljs-string">"PAIR/text2video-zero-controlnet-canny-avatar"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(canny_edges), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"oil painting of a beautiful girl avatar style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(canny_edges), image=canny_edges, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),G=new bs({props:{$$slots:{default:[ys]},$$scope:{ctx:It}}}),Se=new v({props:{title:"TextToVideoZeroPipeline",local:"diffusers.TextToVideoZeroPipeline",headingTag:"h2"}}),Pe=new Z({props:{name:"class diffusers.TextToVideoZeroPipeline",anchor:"diffusers.TextToVideoZeroPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L298"}}),Be=new Z({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": typing.Optional[int] = 1"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">DDIM</a> paper. Only | |
| applies to the <a href="/docs/diffusers/pr_11743/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>"latent"</code> and <code>"np"</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a | |
| <a href="/docs/diffusers/pr_11743/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput">TextToVideoPipelineOutput</a> instead of | |
| a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L545",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The output contains a <code>ndarray</code> of the generated video, when <code>output_type</code> != <code>"latent"</code>, otherwise a | |
| latent code of generated videos and a list of <code>bool</code>s indicating whether the corresponding generated | |
| video contains “not-safe-for-work” (nsfw) content..</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_11743/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput" | |
| >TextToVideoPipelineOutput</a></p> | |
| `}}),Le=new Z({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"cross_attention_kwargs",val:" = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L390",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Latents of backward process output at time timesteps[-1].</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Re=new Z({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L817"}}),ze=new Z({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L366",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),De=new v({props:{title:"TextToVideoZeroSDXLPipeline",local:"diffusers.TextToVideoZeroSDXLPipeline",headingTag:"h2"}}),Qe=new Z({props:{name:"class diffusers.TextToVideoZeroSDXLPipeline",anchor:"diffusers.TextToVideoZeroSDXLPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"text_encoder_2",val:": CLIPTextModelWithProjection"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"image_encoder",val:": CLIPVisionModelWithProjection = None"},{name:"feature_extractor",val:": CLIPImageProcessor = None"},{name:"force_zeros_for_empty_prompt",val:": bool = True"},{name:"add_watermarker",val:": typing.Optional[bool] = None"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L348"}}),Ee=new Z({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"video_length",val:": typing.Optional[int] = 8"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_inference_steps",val:": int = 50"},{name:"denoising_end",val:": typing.Optional[float] = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"negative_prompt_2",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"frame_ids",val:": typing.Optional[typing.List[int]] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": typing.Optional[str] = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"crops_coords_top_left",val:": typing.Tuple[int, int] = (0, 0)"},{name:"target_size",val:": typing.Optional[typing.Tuple[int, int]] = None"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.denoising_end",description:`<strong>denoising_end</strong> (<code>float</code>, <em>optional</em>) — | |
| When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be | |
| completed before it is intentionally prematurely terminated. As a result, the returned sample will | |
| still retain a substantial amount of noise as determined by the discrete timesteps selected by the | |
| scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a | |
| “Mixture of Denoisers” multi-pipeline setup, as elaborated in <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output" rel="nofollow"><strong>Refining the Image | |
| Output</strong></a>`,name:"denoising_end"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| Guidance scale as defined in <a href="https://huggingface.co/papers/2207.12598" rel="nofollow">Classifier-Free Diffusion | |
| Guidance</a>. <code>guidance_scale</code> is defined as <code>w</code> of equation 2. | |
| of <a href="https://huggingface.co/papers/2205.11487" rel="nofollow">Imagen Paper</a>. Guidance scale is enabled by setting | |
| <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to | |
| the text <code>prompt</code>, usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://huggingface.co/papers/2010.02502" rel="nofollow">https://huggingface.co/papers/2010.02502</a>. Only | |
| applies to <a href="/docs/diffusers/pr_11743/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput</code> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that will be called every <code>callback_steps</code> steps during inference. The function will be | |
| called with the following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function will be called. If not specified, the callback will be | |
| called at every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) — | |
| Guidance rescale factor proposed by <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://huggingface.co/papers/2305.08891" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a>. Guidance rescale factor should fix overexposure when | |
| using zero terminal SNR.`,name:"guidance_rescale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| If <code>original_size</code> is not the same as <code>target_size</code> the image will appear to be down- or upsampled. | |
| <code>original_size</code> defaults to <code>(width, height)</code> if not specified. Part of SDXL’s micro-conditioning as | |
| explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"original_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (0, 0)) — | |
| <code>crops_coords_top_left</code> can be used to generate an image that appears to be “cropped” from the position | |
| <code>crops_coords_top_left</code> downwards. Favorable, well-centered images are usually achieved by setting | |
| <code>crops_coords_top_left</code> to (0, 0). Part of SDXL’s micro-conditioning as explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"crops_coords_top_left"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| For most cases, <code>target_size</code> should be set to the desired height and width of the generated image. If | |
| not specified it will default to <code>(width, height)</code>. Part of SDXL’s micro-conditioning as explained in | |
| section 2.2 of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"target_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://huggingface.co/papers/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L951",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> or | |
| <code>tuple</code>: <code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> | |
| if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the | |
| generated images.</p> | |
| `}}),Ye=new Z({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"add_text_embeds",val:""},{name:"add_time_ids",val:""},{name:"cross_attention_kwargs",val:" = None"},{name:"guidance_rescale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.latents",description:`<strong>latents</strong> — | |
| Latents at time timesteps[0].`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.timesteps",description:`<strong>timesteps</strong> — | |
| Time steps along which to perform backward process.`,name:"timesteps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.prompt_embeds",description:`<strong>prompt_embeds</strong> — | |
| Pre-generated text embeddings.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.guidance_scale",description:`<strong>guidance_scale</strong> — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.extra_step_kwargs",description:`<strong>extra_step_kwargs</strong> — | |
| Extra_step_kwargs.`,name:"extra_step_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.num_warmup_steps",description:`<strong>num_warmup_steps</strong> — | |
| number of warmup steps.`,name:"num_warmup_steps"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L862",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents of backward process output at time timesteps[-1]</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Fe=new Z({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"prompt_2",val:": typing.Optional[str] = None"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": typing.Optional[str] = None"},{name:"negative_prompt_2",val:": typing.Optional[str] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_pooled_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L599"}}),He=new Z({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.x_t0",description:`<strong>x_t0</strong> — | |
| Latent code at time t0.`,name:"x_t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t0",description:`<strong>t0</strong> — | |
| Timestep at t0.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.t1",description:`<strong>t1</strong> — | |
| Timestamp at t1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L838",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),qe=new v({props:{title:"TextToVideoPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",headingTag:"h2"}}),Ae=new Z({props:{name:"class diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",parameters:[{name:"images",val:": typing.Union[typing.List[PIL.Image.Image], numpy.ndarray]"},{name:"nsfw_content_detected",val:": typing.Optional[typing.List[bool]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.images",description:`<strong>images</strong> (<code>[List[PIL.Image.Image]</code>, <code>np.ndarray</code>]) — | |
| List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>[List[bool]]</code>) — | |
| List indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content or | |
| <code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/vr_11743/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L196"}}),Oe=new Ts({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video_zero.md"}}),{c(){_=r("meta"),D=n(),x=r("p"),Ke=n(),d(Q.$$.fragment),Ct=n(),X=r("div"),X.innerHTML=xn,Xt=n(),E=r("p"),E.innerHTML=Zn,Wt=n(),Y=r("p"),Y.textContent=Mn,Gt=n(),F=r("ol"),F.innerHTML=Jn,$t=n(),H=r("p"),H.textContent=Un,Nt=n(),q=r("p"),q.innerHTML=Vn,St=n(),A=r("p"),A.textContent=jn,Pt=n(),O=r("p"),O.innerHTML=kn,Bt=n(),K=r("p"),K.innerHTML=In,Lt=n(),d(ee.$$.fragment),Rt=n(),d(te.$$.fragment),zt=n(),oe=r("p"),oe.textContent=Cn,Dt=n(),d(ne.$$.fragment),Qt=n(),se=r("p"),se.textContent=Xn,Et=n(),ae=r("ul"),ae.innerHTML=Wn,Yt=n(),ie=r("p"),ie.textContent=Gn,Ft=n(),d(re.$$.fragment),Ht=n(),et=r("ul"),W=r("li"),d(le.$$.fragment),Bo=ds("In order to use the SDXL model when generating a video from prompt, use the "),tt=r("code"),tt.textContent=$n,Lo=ds(" pipeline:"),qt=n(),d(pe.$$.fragment),At=n(),d(de.$$.fragment),Ot=n(),ce=r("p"),ce.textContent=Nn,Kt=n(),ot=r("ol"),me=r("li"),nt=r("p"),nt.textContent=Sn,Ro=n(),d(fe.$$.fragment),eo=n(),j=r("ol"),k=r("li"),st=r("p"),st.textContent=Pn,zo=n(),d(he.$$.fragment),Do=n(),at=r("p"),at.innerHTML=Bn,Qo=n(),ge=r("li"),it=r("p"),it.innerHTML=Ln,Eo=n(),d(ue.$$.fragment),to=n(),rt=r("ul"),I=r("li"),d(_e.$$.fragment),Yo=n(),lt=r("p"),lt.textContent=Rn,Fo=n(),d(be.$$.fragment),oo=n(),d(Te.$$.fragment),no=n(),ye=r("p"),ye.innerHTML=zn,so=n(),d(we.$$.fragment),ao=n(),ve=r("p"),ve.innerHTML=Dn,io=n(),M=r("ol"),xe=r("li"),pt=r("p"),pt.textContent=Qn,Ho=n(),d(Ze.$$.fragment),qo=n(),Me=r("li"),dt=r("p"),dt.textContent=En,Ao=n(),d(Je.$$.fragment),Oo=n(),Ue=r("li"),ct=r("p"),ct.innerHTML=Yn,Ko=n(),d(Ve.$$.fragment),ro=n(),d(je.$$.fragment),lo=n(),ke=r("p"),ke.innerHTML=Fn,po=n(),J=r("ol"),Ie=r("li"),mt=r("p"),mt.textContent=Hn,en=n(),d(Ce.$$.fragment),tn=n(),Xe=r("li"),ft=r("p"),ft.textContent=qn,on=n(),d(We.$$.fragment),nn=n(),Ge=r("li"),ht=r("p"),ht.innerHTML=An,sn=n(),d($e.$$.fragment),co=n(),Ne=r("p"),Ne.innerHTML=On,mo=n(),d(G.$$.fragment),fo=n(),d(Se.$$.fragment),ho=n(),b=r("div"),d(Pe.$$.fragment),an=n(),$=r("div"),d(Be.$$.fragment),rn=n(),gt=r("p"),gt.textContent=Kn,ln=n(),N=r("div"),d(Le.$$.fragment),pn=n(),ut=r("p"),ut.textContent=es,dn=n(),S=r("div"),d(Re.$$.fragment),cn=n(),_t=r("p"),_t.textContent=ts,mn=n(),P=r("div"),d(ze.$$.fragment),fn=n(),bt=r("p"),bt.textContent=os,go=n(),d(De.$$.fragment),uo=n(),T=r("div"),d(Qe.$$.fragment),hn=n(),B=r("div"),d(Ee.$$.fragment),gn=n(),Tt=r("p"),Tt.textContent=ns,un=n(),L=r("div"),d(Ye.$$.fragment),_n=n(),yt=r("p"),yt.textContent=ss,bn=n(),R=r("div"),d(Fe.$$.fragment),Tn=n(),wt=r("p"),wt.textContent=as,yn=n(),z=r("div"),d(He.$$.fragment),wn=n(),vt=r("p"),vt.textContent=is,_o=n(),d(qe.$$.fragment),bo=n(),C=r("div"),d(Ae.$$.fragment),vn=n(),xt=r("p"),xt.textContent=rs,To=n(),d(Oe.$$.fragment),yo=n(),Vt=r("p"),this.h()},l(e){const o=_s("svelte-u9bgzb",document.head);_=l(o,"META",{name:!0,content:!0}),o.forEach(t),D=s(e),x=l(e,"P",{}),u(x).forEach(t),Ke=s(e),c(Q.$$.fragment,e),Ct=s(e),X=l(e,"DIV",{class:!0,"data-svelte-h":!0}),p(X)!=="svelte-si9ct8"&&(X.innerHTML=xn),Xt=s(e),E=l(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-16xpzkf"&&(E.innerHTML=Zn),Wt=s(e),Y=l(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-1jaz0ks"&&(Y.textContent=Mn),Gt=s(e),F=l(e,"OL",{"data-svelte-h":!0}),p(F)!=="svelte-19ca1wn"&&(F.innerHTML=Jn),$t=s(e),H=l(e,"P",{"data-svelte-h":!0}),p(H)!=="svelte-yhxhyq"&&(H.textContent=Un),Nt=s(e),q=l(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-uupw0l"&&(q.innerHTML=Vn),St=s(e),A=l(e,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1cwsb16"&&(A.textContent=jn),Pt=s(e),O=l(e,"P",{"data-svelte-h":!0}),p(O)!=="svelte-1rtg145"&&(O.innerHTML=kn),Bt=s(e),K=l(e,"P",{"data-svelte-h":!0}),p(K)!=="svelte-18vykkr"&&(K.innerHTML=In),Lt=s(e),c(ee.$$.fragment,e),Rt=s(e),c(te.$$.fragment,e),zt=s(e),oe=l(e,"P",{"data-svelte-h":!0}),p(oe)!=="svelte-1dpixty"&&(oe.textContent=Cn),Dt=s(e),c(ne.$$.fragment,e),Qt=s(e),se=l(e,"P",{"data-svelte-h":!0}),p(se)!=="svelte-rs2kss"&&(se.textContent=Xn),Et=s(e),ae=l(e,"UL",{"data-svelte-h":!0}),p(ae)!=="svelte-1w3gvi6"&&(ae.innerHTML=Wn),Yt=s(e),ie=l(e,"P",{"data-svelte-h":!0}),p(ie)!=="svelte-1xdujjh"&&(ie.textContent=Gn),Ft=s(e),c(re.$$.fragment,e),Ht=s(e),et=l(e,"UL",{});var jt=u(et);W=l(jt,"LI",{});var kt=u(W);c(le.$$.fragment,kt),Bo=cs(kt,"In order to use the SDXL model when generating a video from prompt, use the "),tt=l(kt,"CODE",{"data-svelte-h":!0}),p(tt)!=="svelte-7o0i0w"&&(tt.textContent=$n),Lo=cs(kt," pipeline:"),kt.forEach(t),jt.forEach(t),qt=s(e),c(pe.$$.fragment,e),At=s(e),c(de.$$.fragment,e),Ot=s(e),ce=l(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-13nlg5v"&&(ce.textContent=Nn),Kt=s(e),ot=l(e,"OL",{});var ls=u(ot);me=l(ls,"LI",{});var vo=u(me);nt=l(vo,"P",{"data-svelte-h":!0}),p(nt)!=="svelte-1xo2nq1"&&(nt.textContent=Sn),Ro=s(vo),c(fe.$$.fragment,vo),vo.forEach(t),ls.forEach(t),eo=s(e),j=l(e,"OL",{start:!0});var xo=u(j);k=l(xo,"LI",{});var Zt=u(k);st=l(Zt,"P",{"data-svelte-h":!0}),p(st)!=="svelte-7pm43o"&&(st.textContent=Pn),zo=s(Zt),c(he.$$.fragment,Zt),Do=s(Zt),at=l(Zt,"P",{"data-svelte-h":!0}),p(at)!=="svelte-mij11h"&&(at.innerHTML=Bn),Zt.forEach(t),Qo=s(xo),ge=l(xo,"LI",{});var Zo=u(ge);it=l(Zo,"P",{"data-svelte-h":!0}),p(it)!=="svelte-1wsv5bt"&&(it.innerHTML=Ln),Eo=s(Zo),c(ue.$$.fragment,Zo),Zo.forEach(t),xo.forEach(t),to=s(e),rt=l(e,"UL",{});var ps=u(rt);I=l(ps,"LI",{});var Mt=u(I);c(_e.$$.fragment,Mt),Yo=s(Mt),lt=l(Mt,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-bcy3mo"&&(lt.textContent=Rn),Fo=s(Mt),c(be.$$.fragment,Mt),Mt.forEach(t),ps.forEach(t),oo=s(e),c(Te.$$.fragment,e),no=s(e),ye=l(e,"P",{"data-svelte-h":!0}),p(ye)!=="svelte-1fgx3w6"&&(ye.innerHTML=zn),so=s(e),c(we.$$.fragment,e),ao=s(e),ve=l(e,"P",{"data-svelte-h":!0}),p(ve)!=="svelte-o0vmiz"&&(ve.innerHTML=Dn),io=s(e),M=l(e,"OL",{});var Jt=u(M);xe=l(Jt,"LI",{});var Mo=u(xe);pt=l(Mo,"P",{"data-svelte-h":!0}),p(pt)!=="svelte-1xo2nq1"&&(pt.textContent=Qn),Ho=s(Mo),c(Ze.$$.fragment,Mo),Mo.forEach(t),qo=s(Jt),Me=l(Jt,"LI",{});var Jo=u(Me);dt=l(Jo,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-1wln6n0"&&(dt.textContent=En),Ao=s(Jo),c(Je.$$.fragment,Jo),Jo.forEach(t),Oo=s(Jt),Ue=l(Jt,"LI",{});var Uo=u(Ue);ct=l(Uo,"P",{"data-svelte-h":!0}),p(ct)!=="svelte-14jy04z"&&(ct.innerHTML=Yn),Ko=s(Uo),c(Ve.$$.fragment,Uo),Uo.forEach(t),Jt.forEach(t),ro=s(e),c(je.$$.fragment,e),lo=s(e),ke=l(e,"P",{"data-svelte-h":!0}),p(ke)!=="svelte-x1pkth"&&(ke.innerHTML=Fn),po=s(e),J=l(e,"OL",{});var Ut=u(J);Ie=l(Ut,"LI",{});var Vo=u(Ie);mt=l(Vo,"P",{"data-svelte-h":!0}),p(mt)!=="svelte-1xo2nq1"&&(mt.textContent=Hn),en=s(Vo),c(Ce.$$.fragment,Vo),Vo.forEach(t),tn=s(Ut),Xe=l(Ut,"LI",{});var jo=u(Xe);ft=l(jo,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-1wln6n0"&&(ft.textContent=qn),on=s(jo),c(We.$$.fragment,jo),jo.forEach(t),nn=s(Ut),Ge=l(Ut,"LI",{});var ko=u(Ge);ht=l(ko,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-okpmrc"&&(ht.innerHTML=An),sn=s(ko),c($e.$$.fragment,ko),ko.forEach(t),Ut.forEach(t),co=s(e),Ne=l(e,"P",{"data-svelte-h":!0}),p(Ne)!=="svelte-18tnclt"&&(Ne.innerHTML=On),mo=s(e),c(G.$$.fragment,e),fo=s(e),c(Se.$$.fragment,e),ho=s(e),b=l(e,"DIV",{class:!0});var U=u(b);c(Pe.$$.fragment,U),an=s(U),$=l(U,"DIV",{class:!0});var Io=u($);c(Be.$$.fragment,Io),rn=s(Io),gt=l(Io,"P",{"data-svelte-h":!0}),p(gt)!=="svelte-50j04k"&&(gt.textContent=Kn),Io.forEach(t),ln=s(U),N=l(U,"DIV",{class:!0});var Co=u(N);c(Le.$$.fragment,Co),pn=s(Co),ut=l(Co,"P",{"data-svelte-h":!0}),p(ut)!=="svelte-1cxzr1t"&&(ut.textContent=es),Co.forEach(t),dn=s(U),S=l(U,"DIV",{class:!0});var Xo=u(S);c(Re.$$.fragment,Xo),cn=s(Xo),_t=l(Xo,"P",{"data-svelte-h":!0}),p(_t)!=="svelte-16q0ax1"&&(_t.textContent=ts),Xo.forEach(t),mn=s(U),P=l(U,"DIV",{class:!0});var Wo=u(P);c(ze.$$.fragment,Wo),fn=s(Wo),bt=l(Wo,"P",{"data-svelte-h":!0}),p(bt)!=="svelte-1d8vbe1"&&(bt.textContent=os),Wo.forEach(t),U.forEach(t),go=s(e),c(De.$$.fragment,e),uo=s(e),T=l(e,"DIV",{class:!0});var V=u(T);c(Qe.$$.fragment,V),hn=s(V),B=l(V,"DIV",{class:!0});var Go=u(B);c(Ee.$$.fragment,Go),gn=s(Go),Tt=l(Go,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-v78lg8"&&(Tt.textContent=ns),Go.forEach(t),un=s(V),L=l(V,"DIV",{class:!0});var $o=u(L);c(Ye.$$.fragment,$o),_n=s($o),yt=l($o,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-jp6j47"&&(yt.textContent=ss),$o.forEach(t),bn=s(V),R=l(V,"DIV",{class:!0});var No=u(R);c(Fe.$$.fragment,No),Tn=s(No),wt=l(No,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-16q0ax1"&&(wt.textContent=as),No.forEach(t),yn=s(V),z=l(V,"DIV",{class:!0});var So=u(z);c(He.$$.fragment,So),wn=s(So),vt=l(So,"P",{"data-svelte-h":!0}),p(vt)!=="svelte-1d8vbe1"&&(vt.textContent=is),So.forEach(t),V.forEach(t),_o=s(e),c(qe.$$.fragment,e),bo=s(e),C=l(e,"DIV",{class:!0});var Po=u(C);c(Ae.$$.fragment,Po),vn=s(Po),xt=l(Po,"P",{"data-svelte-h":!0}),p(xt)!=="svelte-1dgz4ei"&&(xt.textContent=rs),Po.forEach(t),To=s(e),c(Oe.$$.fragment,e),yo=s(e),Vt=l(e,"P",{}),u(Vt).forEach(t),this.h()},h(){y(_,"name","hf:doc:metadata"),y(_,"content",vs),y(X,"class","flex flex-wrap space-x-1"),y(j,"start","2"),y($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){a(document.head,_),i(e,D,o),i(e,x,o),i(e,Ke,o),m(Q,e,o),i(e,Ct,o),i(e,X,o),i(e,Xt,o),i(e,E,o),i(e,Wt,o),i(e,Y,o),i(e,Gt,o),i(e,F,o),i(e,$t,o),i(e,H,o),i(e,Nt,o),i(e,q,o),i(e,St,o),i(e,A,o),i(e,Pt,o),i(e,O,o),i(e,Bt,o),i(e,K,o),i(e,Lt,o),m(ee,e,o),i(e,Rt,o),m(te,e,o),i(e,zt,o),i(e,oe,o),i(e,Dt,o),m(ne,e,o),i(e,Qt,o),i(e,se,o),i(e,Et,o),i(e,ae,o),i(e,Yt,o),i(e,ie,o),i(e,Ft,o),m(re,e,o),i(e,Ht,o),i(e,et,o),a(et,W),m(le,W,null),a(W,Bo),a(W,tt),a(W,Lo),i(e,qt,o),m(pe,e,o),i(e,At,o),m(de,e,o),i(e,Ot,o),i(e,ce,o),i(e,Kt,o),i(e,ot,o),a(ot,me),a(me,nt),a(me,Ro),m(fe,me,null),i(e,eo,o),i(e,j,o),a(j,k),a(k,st),a(k,zo),m(he,k,null),a(k,Do),a(k,at),a(j,Qo),a(j,ge),a(ge,it),a(ge,Eo),m(ue,ge,null),i(e,to,o),i(e,rt,o),a(rt,I),m(_e,I,null),a(I,Yo),a(I,lt),a(I,Fo),m(be,I,null),i(e,oo,o),m(Te,e,o),i(e,no,o),i(e,ye,o),i(e,so,o),m(we,e,o),i(e,ao,o),i(e,ve,o),i(e,io,o),i(e,M,o),a(M,xe),a(xe,pt),a(xe,Ho),m(Ze,xe,null),a(M,qo),a(M,Me),a(Me,dt),a(Me,Ao),m(Je,Me,null),a(M,Oo),a(M,Ue),a(Ue,ct),a(Ue,Ko),m(Ve,Ue,null),i(e,ro,o),m(je,e,o),i(e,lo,o),i(e,ke,o),i(e,po,o),i(e,J,o),a(J,Ie),a(Ie,mt),a(Ie,en),m(Ce,Ie,null),a(J,tn),a(J,Xe),a(Xe,ft),a(Xe,on),m(We,Xe,null),a(J,nn),a(J,Ge),a(Ge,ht),a(Ge,sn),m($e,Ge,null),i(e,co,o),i(e,Ne,o),i(e,mo,o),m(G,e,o),i(e,fo,o),m(Se,e,o),i(e,ho,o),i(e,b,o),m(Pe,b,null),a(b,an),a(b,$),m(Be,$,null),a($,rn),a($,gt),a(b,ln),a(b,N),m(Le,N,null),a(N,pn),a(N,ut),a(b,dn),a(b,S),m(Re,S,null),a(S,cn),a(S,_t),a(b,mn),a(b,P),m(ze,P,null),a(P,fn),a(P,bt),i(e,go,o),m(De,e,o),i(e,uo,o),i(e,T,o),m(Qe,T,null),a(T,hn),a(T,B),m(Ee,B,null),a(B,gn),a(B,Tt),a(T,un),a(T,L),m(Ye,L,null),a(L,_n),a(L,yt),a(T,bn),a(T,R),m(Fe,R,null),a(R,Tn),a(R,wt),a(T,yn),a(T,z),m(He,z,null),a(z,wn),a(z,vt),i(e,_o,o),m(qe,e,o),i(e,bo,o),i(e,C,o),m(Ae,C,null),a(C,vn),a(C,xt),i(e,To,o),m(Oe,e,o),i(e,yo,o),i(e,Vt,o),wo=!0},p(e,[o]){const jt={};o&2&&(jt.$$scope={dirty:o,ctx:e}),G.$set(jt)},i(e){wo||(f(Q.$$.fragment,e),f(ee.$$.fragment,e),f(te.$$.fragment,e),f(ne.$$.fragment,e),f(re.$$.fragment,e),f(le.$$.fragment,e),f(pe.$$.fragment,e),f(de.$$.fragment,e),f(fe.$$.fragment,e),f(he.$$.fragment,e),f(ue.$$.fragment,e),f(_e.$$.fragment,e),f(be.$$.fragment,e),f(Te.$$.fragment,e),f(we.$$.fragment,e),f(Ze.$$.fragment,e),f(Je.$$.fragment,e),f(Ve.$$.fragment,e),f(je.$$.fragment,e),f(Ce.$$.fragment,e),f(We.$$.fragment,e),f($e.$$.fragment,e),f(G.$$.fragment,e),f(Se.$$.fragment,e),f(Pe.$$.fragment,e),f(Be.$$.fragment,e),f(Le.$$.fragment,e),f(Re.$$.fragment,e),f(ze.$$.fragment,e),f(De.$$.fragment,e),f(Qe.$$.fragment,e),f(Ee.$$.fragment,e),f(Ye.$$.fragment,e),f(Fe.$$.fragment,e),f(He.$$.fragment,e),f(qe.$$.fragment,e),f(Ae.$$.fragment,e),f(Oe.$$.fragment,e),wo=!0)},o(e){h(Q.$$.fragment,e),h(ee.$$.fragment,e),h(te.$$.fragment,e),h(ne.$$.fragment,e),h(re.$$.fragment,e),h(le.$$.fragment,e),h(pe.$$.fragment,e),h(de.$$.fragment,e),h(fe.$$.fragment,e),h(he.$$.fragment,e),h(ue.$$.fragment,e),h(_e.$$.fragment,e),h(be.$$.fragment,e),h(Te.$$.fragment,e),h(we.$$.fragment,e),h(Ze.$$.fragment,e),h(Je.$$.fragment,e),h(Ve.$$.fragment,e),h(je.$$.fragment,e),h(Ce.$$.fragment,e),h(We.$$.fragment,e),h($e.$$.fragment,e),h(G.$$.fragment,e),h(Se.$$.fragment,e),h(Pe.$$.fragment,e),h(Be.$$.fragment,e),h(Le.$$.fragment,e),h(Re.$$.fragment,e),h(ze.$$.fragment,e),h(De.$$.fragment,e),h(Qe.$$.fragment,e),h(Ee.$$.fragment,e),h(Ye.$$.fragment,e),h(Fe.$$.fragment,e),h(He.$$.fragment,e),h(qe.$$.fragment,e),h(Ae.$$.fragment,e),h(Oe.$$.fragment,e),wo=!1},d(e){e&&(t(D),t(x),t(Ke),t(Ct),t(X),t(Xt),t(E),t(Wt),t(Y),t(Gt),t(F),t($t),t(H),t(Nt),t(q),t(St),t(A),t(Pt),t(O),t(Bt),t(K),t(Lt),t(Rt),t(zt),t(oe),t(Dt),t(Qt),t(se),t(Et),t(ae),t(Yt),t(ie),t(Ft),t(Ht),t(et),t(qt),t(At),t(Ot),t(ce),t(Kt),t(ot),t(eo),t(j),t(to),t(rt),t(oo),t(no),t(ye),t(so),t(ao),t(ve),t(io),t(M),t(ro),t(lo),t(ke),t(po),t(J),t(co),t(Ne),t(mo),t(fo),t(ho),t(b),t(go),t(uo),t(T),t(_o),t(bo),t(C),t(To),t(yo),t(Vt)),t(_),g(Q,e),g(ee,e),g(te,e),g(ne,e),g(re,e),g(le),g(pe,e),g(de,e),g(fe),g(he),g(ue),g(_e),g(be),g(Te,e),g(we,e),g(Ze),g(Je),g(Ve),g(je,e),g(Ce),g(We),g($e),g(G,e),g(Se,e),g(Pe),g(Be),g(Le),g(Re),g(ze),g(De,e),g(Qe),g(Ee),g(Ye),g(Fe),g(He),g(qe,e),g(Ae),g(Oe,e)}}}const vs='{"title":"Text2Video-Zero","local":"text2video-zero","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"Text-To-Video","local":"text-to-video","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Pose Control","local":"text-to-video-with-pose-control","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Edge Control","local":"text-to-video-with-edge-control","sections":[],"depth":3},{"title":"Video Instruct-Pix2Pix","local":"video-instruct-pix2pix","sections":[],"depth":3},{"title":"DreamBooth specialization","local":"dreambooth-specialization","sections":[],"depth":3}],"depth":2},{"title":"TextToVideoZeroPipeline","local":"diffusers.TextToVideoZeroPipeline","sections":[],"depth":2},{"title":"TextToVideoZeroSDXLPipeline","local":"diffusers.TextToVideoZeroSDXLPipeline","sections":[],"depth":2},{"title":"TextToVideoPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function xs(It){return fs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ks extends gs{constructor(_){super(),us(this,_,xs,ws,ms,{})}}export{ks as component}; | |
Xet Storage Details
- Size:
- 97.6 kB
- Xet hash:
- 8f97397eb7d725afcda7181db8cea9bae5119821347e05b8ab647d11227c2d23
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.