Buckets:
| import{s as xs,o as ys,n as ws}from"../chunks/scheduler.8c3d61f6.js";import{S as Ms,i as Zs,g as i,s,r as p,m as Ts,A as Js,h as r,f as t,c as a,j as _,u as c,x as d,n as vs,k as y,y as n,a as l,v as m,d as f,t as h,w as u}from"../chunks/index.da70eac4.js";import{T as js}from"../chunks/Tip.1d9b8c37.js";import{D as J}from"../chunks/Docstring.ee4b6913.js";import{C as w}from"../chunks/CodeBlock.00a903b3.js";import{H as M,E as Us}from"../chunks/EditOnGithub.1e64e623.js";function ks(Wt){let T,L='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-components-across-pipelines">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){T=i("p"),T.innerHTML=L},l(Z){T=r(Z,"P",{"data-svelte-h":!0}),d(T)!=="svelte-1wmc0l4"&&(T.innerHTML=L)},m(Z,Oe){l(Z,T,Oe)},p:ws,d(Z){Z&&t(T)}}}function Vs(Wt){let T,L,Z,Oe,D,$t,Q,kn='<a href="https://huggingface.co/papers/2303.13439" rel="nofollow">Text2Video-Zero: Text-to-Image Diffusion Models are Zero-Shot Video Generators</a> is by Levon Khachatryan, Andranik Movsisyan, Vahram Tadevosyan, Roberto Henschel, <a href="https://www.ece.utexas.edu/people/faculty/atlas-wang" rel="nofollow">Zhangyang Wang</a>, Shant Navasardyan, <a href="https://www.humphreyshi.com" rel="nofollow">Humphrey Shi</a>.',Pt,E,Vn="Text2Video-Zero enables zero-shot video generation using either:",Gt,Y,In="<li>A textual prompt</li> <li>A prompt combined with guidance from poses or edges</li> <li>Video Instruct-Pix2Pix (instruction-guided video editing)</li>",St,F,Cn="Results are temporally consistent and closely follow the guidance and textual prompts.",Nt,H,Xn='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/t2v_zero_teaser.png" alt="teaser-img"/>',zt,A,Wn="The abstract from the paper is:",Bt,q,$n=`<em>Recent text-to-video generation approaches rely on computationally heavy training and require large-scale video datasets. In this paper, we introduce a new task of zero-shot text-to-video generation and propose a low-cost approach (without any training or optimization) by leveraging the power of existing text-to-image synthesis methods (e.g., Stable Diffusion), making them suitable for the video domain. | |
| Our key modifications include (i) enriching the latent codes of the generated frames with motion dynamics to keep the global scene and the background time consistent; and (ii) reprogramming frame-level self-attention using a new cross-frame attention of each frame on the first frame, to preserve the context, appearance, and identity of the foreground object. | |
| Experiments show that this leads to low overhead, yet high-quality and remarkably consistent video generation. Moreover, our approach is not limited to text-to-video synthesis but is also applicable to other tasks such as conditional and content-specialized video generation, and Video Instruct-Pix2Pix, i.e., instruction-guided video editing. | |
| As experiments show, our method performs comparably or sometimes better than recent approaches, despite not being trained on additional video data.</em>`,Rt,O,Pn='You can find additional information about Text2Video-Zero on the <a href="https://text2video-zero.github.io/" rel="nofollow">project page</a>, <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, and <a href="https://github.com/Picsart-AI-Research/Text2Video-Zero" rel="nofollow">original codebase</a>.',Lt,K,Dt,ee,Qt,te,Gn="To generate a video from prompt, run the following Python code:",Et,oe,Yt,ne,Sn="You can change these parameters in the pipeline call:",Ft,se,Nn='<li>Motion field strength (see the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1):<ul><li><code>motion_field_strength_x</code> and <code>motion_field_strength_y</code>. Default: <code>motion_field_strength_x=12</code>, <code>motion_field_strength_y=12</code></li></ul></li> <li><code>T</code> and <code>T'</code> (see the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1)<ul><li><code>t0</code> and <code>t1</code> in the range <code>{0, ..., num_inference_steps}</code>. Default: <code>t0=45</code>, <code>t1=48</code></li></ul></li> <li>Video length:<ul><li><code>video_length</code>, the number of frames video_length to be generated. Default: <code>video_length=8</code></li></ul></li>',Ht,ae,zn="We can also generate longer videos by doing the processing in a chunk-by-chunk manner:",At,ie,qt,Ke,X,re,Ro,et,Bn="TextToVideoZeroSDXLPipeline",Lo,Ot,le,Kt,de,eo,pe,Rn="To generate a video from prompt with additional pose control",to,tt,ce,ot,Ln="Download a demo video",Do,me,oo,k,V,nt,Dn="Read video containing extracted pose images",Qo,fe,Eo,st,Qn='To extract pose from actual video, read <a href="controlnet">ControlNet documentation</a>.',Yo,he,at,En="Run <code>StableDiffusionControlNetPipeline</code> with our custom attention processor",Fo,ue,no,it,I,_e,Ho,rt,Yn="Since our attention processor also works with SDXL, it can be utilized to generate a video from prompt using ControlNet models powered by SDXL:",Ao,ge,so,be,ao,Te,Fn='To generate a video from prompt with additional Canny edge control, follow the same steps described above for pose-guided generation using <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a>.',io,ve,ro,xe,Hn='To perform text-guided video editing (with <a href="pix2pix">InstructPix2Pix</a>):',lo,j,ye,lt,An="Download a demo video",qo,we,Oo,Me,dt,qn="Read video from path",Ko,Ze,en,Je,pt,On="Run <code>StableDiffusionInstructPix2PixPipeline</code> with our custom attention processor",tn,je,po,Ue,co,ke,Kn=`Methods <strong>Text-To-Video</strong>, <strong>Text-To-Video with Pose Control</strong> and <strong>Text-To-Video with Edge Control</strong> | |
| can run with custom <a href="../../training/dreambooth">DreamBooth</a> models, as shown below for | |
| <a href="https://huggingface.co/lllyasviel/sd-controlnet-canny" rel="nofollow">Canny edge ControlNet model</a> and | |
| <a href="https://huggingface.co/PAIR/text2video-zero-controlnet-canny-avatar" rel="nofollow">Avatar style DreamBooth</a> model:`,mo,U,Ve,ct,es="Download a demo video",on,Ie,nn,Ce,mt,ts="Read video from path",sn,Xe,an,We,ft,os="Run <code>StableDiffusionControlNetPipeline</code> with custom trained DreamBooth model",rn,$e,fo,Pe,ns='You can filter out some available DreamBooth-trained models with <a href="https://huggingface.co/models?search=dreambooth" rel="nofollow">this link</a>.',ho,W,uo,Ge,_o,g,Se,ln,ht,ss="Pipeline for zero-shot text-to-video generation using Stable Diffusion.",dn,ut,as=`This model inherits from <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,pn,$,Ne,cn,_t,is="The call function to the pipeline for generation.",mn,P,ze,fn,gt,rs="Perform backward process given list of time steps.",hn,G,Be,un,bt,ls="Encodes the prompt into text encoder hidden states.",_n,S,Re,gn,Tt,ds="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",go,Le,bo,b,De,bn,vt,ps="Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.",Tn,xt,cs=`This model inherits from <a href="/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,vn,N,Qe,xn,yt,ms="Function invoked when calling the pipeline for generation.",yn,z,Ee,wn,wt,fs="Perform backward process given list of time steps",Mn,B,Ye,Zn,Mt,hs="Encodes the prompt into text encoder hidden states.",Jn,R,Fe,jn,Zt,us="Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.",To,He,vo,C,Ae,Un,Jt,_s="Output class for zero-shot text-to-video pipeline.",xo,qe,yo,It,wo;return D=new M({props:{title:"Text2Video-Zero",local:"text2video-zero",headingTag:"h1"}}),K=new M({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),ee=new M({props:{title:"Text-To-Video",local:"text-to-video",headingTag:"h3"}}),oe=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTBBcGlwZSUyMCUzRCUyMFRleHRUb1ZpZGVvWmVyb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNikudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwcGFuZGElMjBpcyUyMHBsYXlpbmclMjBndWl0YXIlMjBvbiUyMHRpbWVzJTIwc3F1YXJlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0Rwcm9tcHQpLmltYWdlcyUwQXJlc3VsdCUyMCUzRCUyMCU1QihyJTIwKiUyMDI1NSkuYXN0eXBlKCUyMnVpbnQ4JTIyKSUyMGZvciUyMHIlMjBpbiUyMHJlc3VsdCU1RCUwQWltYWdlaW8ubWltc2F2ZSglMjJ2aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| model_id = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| result = pipe(prompt=prompt).images | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),ie=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvUGlwZWxpbmUlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTBBcGlwZSUyMCUzRCUyMFRleHRUb1ZpZGVvWmVyb1BpcGVsaW5lLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNikudG8oJTIyY3VkYSUyMiklMEFzZWVkJTIwJTNEJTIwMCUwQXZpZGVvX2xlbmd0aCUyMCUzRCUyMDI0JTIwJTIwJTIzMjQlMjAlQzMlQjclMjA0ZnBzJTIwJTNEJTIwNiUyMHNlY29uZHMlMEFjaHVua19zaXplJTIwJTNEJTIwOCUwQXByb21wdCUyMCUzRCUyMCUyMkElMjBwYW5kYSUyMGlzJTIwcGxheWluZyUyMGd1aXRhciUyMG9uJTIwdGltZXMlMjBzcXVhcmUlMjIlMEElMEElMjMlMjBHZW5lcmF0ZSUyMHRoZSUyMHZpZGVvJTIwY2h1bmstYnktY2h1bmslMEFyZXN1bHQlMjAlM0QlMjAlNUIlNUQlMEFjaHVua19pZHMlMjAlM0QlMjBucC5hcmFuZ2UoMCUyQyUyMHZpZGVvX2xlbmd0aCUyQyUyMGNodW5rX3NpemUlMjAtJTIwMSklMEFnZW5lcmF0b3IlMjAlM0QlMjB0b3JjaC5HZW5lcmF0b3IoZGV2aWNlJTNEJTIyY3VkYSUyMiklMEFmb3IlMjBpJTIwaW4lMjByYW5nZShsZW4oY2h1bmtfaWRzKSklM0ElMEElMjAlMjAlMjAlMjBwcmludChmJTIyUHJvY2Vzc2luZyUyMGNodW5rJTIwJTdCaSUyMCUyQiUyMDElN0QlMjAlMkYlMjAlN0JsZW4oY2h1bmtfaWRzKSU3RCUyMiklMEElMjAlMjAlMjAlMjBjaF9zdGFydCUyMCUzRCUyMGNodW5rX2lkcyU1QmklNUQlMEElMjAlMjAlMjAlMjBjaF9lbmQlMjAlM0QlMjB2aWRlb19sZW5ndGglMjBpZiUyMGklMjAlM0QlM0QlMjBsZW4oY2h1bmtfaWRzKSUyMC0lMjAxJTIwZWxzZSUyMGNodW5rX2lkcyU1QmklMjAlMkIlMjAxJTVEJTBBJTIwJTIwJTIwJTIwJTIzJTIwQXR0YWNoJTIwdGhlJTIwZmlyc3QlMjBmcmFtZSUyMGZvciUyMENyb3NzJTIwRnJhbWUlMjBBdHRlbnRpb24lMEElMjAlMjAlMjAlMjBmcmFtZV9pZHMlMjAlM0QlMjAlNUIwJTVEJTIwJTJCJTIwbGlzdChyYW5nZShjaF9zdGFydCUyQyUyMGNoX2VuZCkpJTBBJTIwJTIwJTIwJTIwJTIzJTIwRml4JTIwdGhlJTIwc2VlZCUyMGZvciUyMHRoZSUyMHRlbXBvcmFsJTIwY29uc2lzdGVuY3klMEElMjAlMjAlMjAlMjBnZW5lcmF0b3IubWFudWFsX3NlZWQoc2VlZCklMEElMjAlMjAlMjAlMjBvdXRwdXQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCUyQyUyMHZpZGVvX2xlbmd0aCUzRGxlbihmcmFtZV9pZHMpJTJDJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yJTJDJTIwZnJhbWVfaWRzJTNEZnJhbWVfaWRzKSUwQSUyMCUyMCUyMCUyMHJlc3VsdC5hcHBlbmQob3V0cHV0LmltYWdlcyU1QjElM0ElNUQpJTBBJTBBJTIzJTIwQ29uY2F0ZW5hdGUlMjBjaHVua3MlMjBhbmQlMjBzYXZlJTBBcmVzdWx0JTIwJTNEJTIwbnAuY29uY2F0ZW5hdGUocmVzdWx0KSUwQXJlc3VsdCUyMCUzRCUyMCU1QihyJTIwKiUyMDI1NSkuYXN0eXBlKCUyMnVpbnQ4JTIyKSUyMGZvciUyMHIlMjBpbiUyMHJlc3VsdCU1RCUwQWltYWdlaW8ubWltc2F2ZSglMjJ2aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroPipeline | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| model_id = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| seed = <span class="hljs-number">0</span> | |
| video_length = <span class="hljs-number">24</span> <span class="hljs-comment">#24 ÷ 4fps = 6 seconds</span> | |
| chunk_size = <span class="hljs-number">8</span> | |
| prompt = <span class="hljs-string">"A panda is playing guitar on times square"</span> | |
| <span class="hljs-comment"># Generate the video chunk-by-chunk</span> | |
| result = [] | |
| chunk_ids = np.arange(<span class="hljs-number">0</span>, video_length, chunk_size - <span class="hljs-number">1</span>) | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(chunk_ids)): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Processing chunk <span class="hljs-subst">{i + <span class="hljs-number">1</span>}</span> / <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk_ids)}</span>"</span>) | |
| ch_start = chunk_ids[i] | |
| ch_end = video_length <span class="hljs-keyword">if</span> i == <span class="hljs-built_in">len</span>(chunk_ids) - <span class="hljs-number">1</span> <span class="hljs-keyword">else</span> chunk_ids[i + <span class="hljs-number">1</span>] | |
| <span class="hljs-comment"># Attach the first frame for Cross Frame Attention</span> | |
| frame_ids = [<span class="hljs-number">0</span>] + <span class="hljs-built_in">list</span>(<span class="hljs-built_in">range</span>(ch_start, ch_end)) | |
| <span class="hljs-comment"># Fix the seed for the temporal consistency</span> | |
| generator.manual_seed(seed) | |
| output = pipe(prompt=prompt, video_length=<span class="hljs-built_in">len</span>(frame_ids), generator=generator, frame_ids=frame_ids) | |
| result.append(output.images[<span class="hljs-number">1</span>:]) | |
| <span class="hljs-comment"># Concatenate chunks and save</span> | |
| result = np.concatenate(result) | |
| result = [(r * <span class="hljs-number">255</span>).astype(<span class="hljs-string">"uint8"</span>) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> result] | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),re=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),le=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMEFwaXBlJTIwJTNEJTIwVGV4dFRvVmlkZW9aZXJvU0RYTFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHZhcmlhbnQlM0QlMjJmcDE2JTIyJTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSkudG8oJTIyY3VkYSUyMik=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoZeroSDXLPipeline | |
| model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span> | |
| pipe = TextToVideoZeroSDXLPipeline.from_pretrained( | |
| model_id, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>)`,wrap:!1}}),de=new M({props:{title:"Text-To-Video with Pose Control",local:"text-to-video-with-pose-control",headingTag:"h3"}}),me=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBvc2VzX3NrZWxldG9uX2dpZnMlMkZkYW5jZTFfY29yci5tcDQlMjIlMEFyZXBvX2lkJTIwJTNEJTIwJTIyUEFJUiUyRlRleHQyVmlkZW8tWmVybyUyMiUwQXZpZGVvX3BhdGglMjAlM0QlMjBoZl9odWJfZG93bmxvYWQocmVwb190eXBlJTNEJTIyc3BhY2UlMjIlMkMlMjByZXBvX2lkJTNEcmVwb19pZCUyQyUyMGZpbGVuYW1lJTNEZmlsZW5hbWUp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/poses_skeleton_gifs/dance1_corr.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),fe=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXBvc2VfaW1hZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| pose_images = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),ue=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTBBY29udHJvbG5ldCUyMCUzRCUyMENvbnRyb2xOZXRNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIybGxseWFzdmllbCUyRnNkLWNvbnRyb2xuZXQtb3BlbnBvc2UlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwbW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJTIyY3VkYSUyMiklMEElMEElMjMlMjBTZXQlMjB0aGUlMjBhdHRlbnRpb24lMjBwcm9jZXNzb3IlMEFwaXBlLnVuZXQuc2V0X2F0dG5fcHJvY2Vzc29yKENyb3NzRnJhbWVBdHRuUHJvY2Vzc29yKGJhdGNoX3NpemUlM0QyKSklMEFwaXBlLmNvbnRyb2xuZXQuc2V0X2F0dG5fcHJvY2Vzc29yKENyb3NzRnJhbWVBdHRuUHJvY2Vzc29yKGJhdGNoX3NpemUlM0QyKSklMEElMEElMjMlMjBmaXglMjBsYXRlbnRzJTIwZm9yJTIwYWxsJTIwZnJhbWVzJTBBbGF0ZW50cyUyMCUzRCUyMHRvcmNoLnJhbmRuKCgxJTJDJTIwNCUyQyUyMDY0JTJDJTIwNjQpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNikucmVwZWF0KGxlbihwb3NlX2ltYWdlcyklMkMlMjAxJTJDJTIwMSUyQyUyMDEpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyRGFydGglMjBWYWRlciUyMGRhbmNpbmclMjBpbiUyMGElMjBkZXNlcnQlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4ocG9zZV9pbWFnZXMpJTJDJTIwaW1hZ2UlM0Rwb3NlX2ltYWdlcyUyQyUyMGxhdGVudHMlM0RsYXRlbnRzKS5pbWFnZXMlMEFpbWFnZWlvLm1pbXNhdmUoJTIydmlkZW8ubXA0JTIyJTJDJTIwcmVzdWx0JTJDJTIwZnBzJTNENCk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-openpose"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),_e=new M({props:{title:"SDXL Support",local:"sdxl-support",headingTag:"h4"}}),ge=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uWExDb250cm9sTmV0UGlwZWxpbmUlMkMlMjBDb250cm9sTmV0TW9kZWwlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQWNvbnRyb2xuZXRfbW9kZWxfaWQlMjAlM0QlMjAndGhpYmF1ZCUyRmNvbnRyb2xuZXQtb3BlbnBvc2Utc2R4bC0xLjAnJTBBbW9kZWxfaWQlMjAlM0QlMjAnc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJyUwQSUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGNvbnRyb2xuZXRfbW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvbkNvbnRyb2xOZXRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTA5bW9kZWxfaWQlMkMlMjBjb250cm9sbmV0JTNEY29udHJvbG5ldCUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUwQSkudG8oJ2N1ZGEnKSUwQSUwQSUyMyUyMFNldCUyMHRoZSUyMGF0dGVudGlvbiUyMHByb2Nlc3NvciUwQXBpcGUudW5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQXBpcGUuY29udHJvbG5ldC5zZXRfYXR0bl9wcm9jZXNzb3IoQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IoYmF0Y2hfc2l6ZSUzRDIpKSUwQSUwQSUyMyUyMGZpeCUyMGxhdGVudHMlMjBmb3IlMjBhbGwlMjBmcmFtZXMlMEFsYXRlbnRzJTIwJTNEJTIwdG9yY2gucmFuZG4oKDElMkMlMjA0JTJDJTIwMTI4JTJDJTIwMTI4KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4ocG9zZV9pbWFnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkRhcnRoJTIwVmFkZXIlMjBkYW5jaW5nJTIwaW4lMjBhJTIwZGVzZXJ0JTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKHBvc2VfaW1hZ2VzKSUyQyUyMGltYWdlJTNEcG9zZV9pbWFnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| controlnet_model_id = <span class="hljs-string">'thibaud/controlnet-openpose-sdxl-1.0'</span> | |
| model_id = <span class="hljs-string">'stabilityai/stable-diffusion-xl-base-1.0'</span> | |
| controlnet = ControlNetModel.from_pretrained(controlnet_model_id, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">'cuda'</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(pose_images), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"Darth Vader dancing in a desert"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(pose_images), image=pose_images, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),be=new M({props:{title:"Text-To-Video with Edge Control",local:"text-to-video-with-edge-control",headingTag:"h3"}}),ve=new M({props:{title:"Video Instruct-Pix2Pix",local:"video-instruct-pix2pix",headingTag:"h3"}}),we=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRnBpeDJwaXglMjB2aWRlbyUyRmNhbWVsLm1wNCUyMiUwQXJlcG9faWQlMjAlM0QlMjAlMjJQQUlSJTJGVGV4dDJWaWRlby1aZXJvJTIyJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGhmX2h1Yl9kb3dubG9hZChyZXBvX3R5cGUlM0QlMjJzcGFjZSUyMiUyQyUyMHJlcG9faWQlM0RyZXBvX2lkJTJDJTIwZmlsZW5hbWUlM0RmaWxlbmFtZSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/pix2pix video/camel.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Ze=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQXZpZGVvJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| video = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),je=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uSW5zdHJ1Y3RQaXgyUGl4UGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnBpcGVsaW5lcy50ZXh0X3RvX3ZpZGVvX3N5bnRoZXNpcy5waXBlbGluZV90ZXh0X3RvX3ZpZGVvX3plcm8lMjBpbXBvcnQlMjBDcm9zc0ZyYW1lQXR0blByb2Nlc3NvciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIydGltYnJvb2tzJTJGaW5zdHJ1Y3QtcGl4MnBpeCUyMiUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25JbnN0cnVjdFBpeDJQaXhQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnRvKCUyMmN1ZGElMjIpJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMykpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIybWFrZSUyMGl0JTIwVmFuJTIwR29naCUyMFN0YXJyeSUyME5pZ2h0JTIwc3R5bGUlMjIlMEFyZXN1bHQlMjAlM0QlMjBwaXBlKHByb21wdCUzRCU1QnByb21wdCU1RCUyMColMjBsZW4odmlkZW8pJTJDJTIwaW1hZ2UlM0R2aWRlbykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMmVkaXRlZF92aWRlby5tcDQlMjIlMkMlMjByZXN1bHQlMkMlMjBmcHMlM0Q0KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| model_id = <span class="hljs-string">"timbrooks/instruct-pix2pix"</span> | |
| pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">3</span>)) | |
| prompt = <span class="hljs-string">"make it Van Gogh Starry Night style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(video), image=video).images | |
| imageio.mimsave(<span class="hljs-string">"edited_video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),Ue=new M({props:{title:"DreamBooth specialization",local:"dreambooth-specialization",headingTag:"h3"}}),Ie=new w({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQSUwQWZpbGVuYW1lJTIwJTNEJTIwJTIyX19hc3NldHNfXyUyRmNhbm55X3ZpZGVvc19tcDQlMkZnaXJsX3R1cm5pbmcubXA0JTIyJTBBcmVwb19pZCUyMCUzRCUyMCUyMlBBSVIlMkZUZXh0MlZpZGVvLVplcm8lMjIlMEF2aWRlb19wYXRoJTIwJTNEJTIwaGZfaHViX2Rvd25sb2FkKHJlcG9fdHlwZSUzRCUyMnNwYWNlJTIyJTJDJTIwcmVwb19pZCUzRHJlcG9faWQlMkMlMjBmaWxlbmFtZSUzRGZpbGVuYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| filename = <span class="hljs-string">"__assets__/canny_videos_mp4/girl_turning.mp4"</span> | |
| repo_id = <span class="hljs-string">"PAIR/Text2Video-Zero"</span> | |
| video_path = hf_hub_download(repo_type=<span class="hljs-string">"space"</span>, repo_id=repo_id, filename=filename)`,wrap:!1}}),Xe=new w({props:{code:"ZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBaW1wb3J0JTIwaW1hZ2VpbyUwQSUwQXJlYWRlciUyMCUzRCUyMGltYWdlaW8uZ2V0X3JlYWRlcih2aWRlb19wYXRoJTJDJTIwJTIyZmZtcGVnJTIyKSUwQWZyYW1lX2NvdW50JTIwJTNEJTIwOCUwQWNhbm55X2VkZ2VzJTIwJTNEJTIwJTVCSW1hZ2UuZnJvbWFycmF5KHJlYWRlci5nZXRfZGF0YShpKSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShmcmFtZV9jb3VudCklNUQ=",highlighted:`<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> imageio | |
| reader = imageio.get_reader(video_path, <span class="hljs-string">"ffmpeg"</span>) | |
| frame_count = <span class="hljs-number">8</span> | |
| canny_edges = [Image.fromarray(reader.get_data(i)) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(frame_count)]`,wrap:!1}}),$e=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uQ29udHJvbE5ldFBpcGVsaW5lJTJDJTIwQ29udHJvbE5ldE1vZGVsJTBBZnJvbSUyMGRpZmZ1c2Vycy5waXBlbGluZXMudGV4dF90b192aWRlb19zeW50aGVzaXMucGlwZWxpbmVfdGV4dF90b192aWRlb196ZXJvJTIwaW1wb3J0JTIwQ3Jvc3NGcmFtZUF0dG5Qcm9jZXNzb3IlMEElMEElMjMlMjBzZXQlMjBtb2RlbCUyMGlkJTIwdG8lMjBjdXN0b20lMjBtb2RlbCUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyUEFJUiUyRnRleHQydmlkZW8temVyby1jb250cm9sbmV0LWNhbm55LWF2YXRhciUyMiUwQWNvbnRyb2xuZXQlMjAlM0QlMjBDb250cm9sTmV0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmxsbHlhc3ZpZWwlMkZzZC1jb250cm9sbmV0LWNhbm55JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25Db250cm9sTmV0UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTIwY29udHJvbG5ldCUzRGNvbnRyb2xuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwU2V0JTIwdGhlJTIwYXR0ZW50aW9uJTIwcHJvY2Vzc29yJTBBcGlwZS51bmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBcGlwZS5jb250cm9sbmV0LnNldF9hdHRuX3Byb2Nlc3NvcihDcm9zc0ZyYW1lQXR0blByb2Nlc3NvcihiYXRjaF9zaXplJTNEMikpJTBBJTBBJTIzJTIwZml4JTIwbGF0ZW50cyUyMGZvciUyMGFsbCUyMGZyYW1lcyUwQWxhdGVudHMlMjAlM0QlMjB0b3JjaC5yYW5kbigoMSUyQyUyMDQlMkMlMjA2NCUyQyUyMDY0KSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpLnJlcGVhdChsZW4oY2FubnlfZWRnZXMpJTJDJTIwMSUyQyUyMDElMkMlMjAxKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMm9pbCUyMHBhaW50aW5nJTIwb2YlMjBhJTIwYmVhdXRpZnVsJTIwZ2lybCUyMGF2YXRhciUyMHN0eWxlJTIyJTBBcmVzdWx0JTIwJTNEJTIwcGlwZShwcm9tcHQlM0QlNUJwcm9tcHQlNUQlMjAqJTIwbGVuKGNhbm55X2VkZ2VzKSUyQyUyMGltYWdlJTNEY2FubnlfZWRnZXMlMkMlMjBsYXRlbnRzJTNEbGF0ZW50cykuaW1hZ2VzJTBBaW1hZ2Vpby5taW1zYXZlKCUyMnZpZGVvLm1wNCUyMiUyQyUyMHJlc3VsdCUyQyUyMGZwcyUzRDQp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel | |
| <span class="hljs-keyword">from</span> diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero <span class="hljs-keyword">import</span> CrossFrameAttnProcessor | |
| <span class="hljs-comment"># set model id to custom model</span> | |
| model_id = <span class="hljs-string">"PAIR/text2video-zero-controlnet-canny-avatar"</span> | |
| controlnet = ControlNetModel.from_pretrained(<span class="hljs-string">"lllyasviel/sd-controlnet-canny"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| model_id, controlnet=controlnet, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Set the attention processor</span> | |
| pipe.unet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| pipe.controlnet.set_attn_processor(CrossFrameAttnProcessor(batch_size=<span class="hljs-number">2</span>)) | |
| <span class="hljs-comment"># fix latents for all frames</span> | |
| latents = torch.randn((<span class="hljs-number">1</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16).repeat(<span class="hljs-built_in">len</span>(canny_edges), <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| prompt = <span class="hljs-string">"oil painting of a beautiful girl avatar style"</span> | |
| result = pipe(prompt=[prompt] * <span class="hljs-built_in">len</span>(canny_edges), image=canny_edges, latents=latents).images | |
| imageio.mimsave(<span class="hljs-string">"video.mp4"</span>, result, fps=<span class="hljs-number">4</span>)`,wrap:!1}}),W=new js({props:{$$slots:{default:[ks]},$$scope:{ctx:Wt}}}),Ge=new M({props:{title:"TextToVideoZeroPipeline",local:"diffusers.TextToVideoZeroPipeline",headingTag:"h2"}}),Se=new J({props:{name:"class diffusers.TextToVideoZeroPipeline",anchor:"diffusers.TextToVideoZeroPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"safety_checker",val:": StableDiffusionSafetyChecker"},{name:"feature_extractor",val:": CLIPImageProcessor"},{name:"requires_safety_checker",val:": bool = True"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/main/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — | |
| A <a href="/docs/diffusers/main/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoZeroPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/main/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"},{anchor:"diffusers.TextToVideoZeroPipeline.safety_checker",description:`<strong>safety_checker</strong> (<code>StableDiffusionSafetyChecker</code>) — | |
| Classification module that estimates whether generated images could be considered offensive or harmful. | |
| Please refer to the <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow">model card</a> for more details | |
| about a model’s potential harms.`,name:"safety_checker"},{anchor:"diffusers.TextToVideoZeroPipeline.feature_extractor",description:`<strong>feature_extractor</strong> (<code>CLIPImageProcessor</code>) — | |
| A <code>CLIPImageProcessor</code> to extract features from generated images; used as inputs to the <code>safety_checker</code>.`,name:"feature_extractor"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L284"}}),Ne=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroPipeline.__call__",parameters:[{name:"prompt",val:": Union"},{name:"video_length",val:": Optional = 8"},{name:"height",val:": Optional = None"},{name:"width",val:": Optional = None"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": Union = None"},{name:"num_videos_per_prompt",val:": Optional = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": Union = None"},{name:"latents",val:": Optional = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": Optional = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": Optional = None"},{name:"callback_steps",val:": Optional = 1"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"},{name:"frame_ids",val:": Optional = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>"latent"</code> and <code>"np"</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a | |
| <a href="/docs/diffusers/main/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput">TextToVideoPipelineOutput</a> instead of | |
| a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"},{anchor:"diffusers.TextToVideoZeroPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L521",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The output contains a <code>ndarray</code> of the generated video, when <code>output_type</code> != <code>"latent"</code>, otherwise a | |
| latent code of generated videos and a list of <code>bool</code>s indicating whether the corresponding generated | |
| video contains “not-safe-for-work” (nsfw) content..</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/main/en/api/pipelines/text_to_video_zero#diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput" | |
| >TextToVideoPipelineOutput</a></p> | |
| `}}),ze=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"cross_attention_kwargs",val:" = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step. | |
| extra_step_kwargs — | |
| Extra_step_kwargs. | |
| cross_attention_kwargs — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>. | |
| num_warmup_steps — | |
| number of warmup steps.`,name:"callback_steps"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L370",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Latents of backward process output at time timesteps[-1].</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Be=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": Optional = None"},{name:"negative_prompt_embeds",val:": Optional = None"},{name:"lora_scale",val:": Optional = None"},{name:"clip_skip",val:": Optional = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded | |
| device — (<code>torch.device</code>): | |
| torch device`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L793"}}),Re=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L346",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),Le=new M({props:{title:"TextToVideoZeroSDXLPipeline",local:"diffusers.TextToVideoZeroSDXLPipeline",headingTag:"h2"}}),De=new J({props:{name:"class diffusers.TextToVideoZeroSDXLPipeline",anchor:"diffusers.TextToVideoZeroSDXLPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"text_encoder_2",val:": CLIPTextModelWithProjection"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"tokenizer_2",val:": CLIPTokenizer"},{name:"unet",val:": UNet2DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"},{name:"image_encoder",val:": CLIPVisionModelWithProjection = None"},{name:"feature_extractor",val:": CLIPImageProcessor = None"},{name:"force_zeros_for_empty_prompt",val:": bool = True"},{name:"add_watermarker",val:": Optional = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/main/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder. Stable Diffusion XL uses the text portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel" rel="nofollow">CLIP</a>, specifically | |
| the <a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a> variant.`,name:"text_encoder"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.text_encoder_2",description:`<strong>text_encoder_2</strong> (<code> CLIPTextModelWithProjection</code>) — | |
| Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of | |
| <a href="https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection" rel="nofollow">CLIP</a>, | |
| specifically the | |
| <a href="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" rel="nofollow">laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</a> | |
| variant.`,name:"text_encoder_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.tokenizer_2",description:`<strong>tokenizer_2</strong> (<code>CLIPTokenizer</code>) — | |
| Second Tokenizer of class | |
| <a href="https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a>.`,name:"tokenizer_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.unet",description:'<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet2d-cond#diffusers.UNet2DConditionModel">UNet2DConditionModel</a>) — Conditional U-Net architecture to denoise the encoded image latents.',name:"unet"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/main/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/main/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L326"}}),Qe=new J({props:{name:"__call__",anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__",parameters:[{name:"prompt",val:": Union"},{name:"prompt_2",val:": Union = None"},{name:"video_length",val:": Optional = 8"},{name:"height",val:": Optional = None"},{name:"width",val:": Optional = None"},{name:"num_inference_steps",val:": int = 50"},{name:"denoising_end",val:": Optional = None"},{name:"guidance_scale",val:": float = 7.5"},{name:"negative_prompt",val:": Union = None"},{name:"negative_prompt_2",val:": Union = None"},{name:"num_videos_per_prompt",val:": Optional = 1"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": Union = None"},{name:"frame_ids",val:": Optional = None"},{name:"prompt_embeds",val:": Optional = None"},{name:"negative_prompt_embeds",val:": Optional = None"},{name:"pooled_prompt_embeds",val:": Optional = None"},{name:"negative_pooled_prompt_embeds",val:": Optional = None"},{name:"latents",val:": Optional = None"},{name:"motion_field_strength_x",val:": float = 12"},{name:"motion_field_strength_y",val:": float = 12"},{name:"output_type",val:": Optional = 'tensor'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": Optional = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": Optional = None"},{name:"guidance_rescale",val:": float = 0.0"},{name:"original_size",val:": Optional = None"},{name:"crops_coords_top_left",val:": Tuple = (0, 0)"},{name:"target_size",val:": Optional = None"},{name:"t0",val:": int = 44"},{name:"t1",val:": int = 47"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>. | |
| instead.`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.video_length",description:`<strong>video_length</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| The number of generated video frames.`,name:"video_length"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to self.unet.config.sample_size * self.vae_scale_factor) — | |
| The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.denoising_end",description:`<strong>denoising_end</strong> (<code>float</code>, <em>optional</em>) — | |
| When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be | |
| completed before it is intentionally prematurely terminated. As a result, the returned sample will | |
| still retain a substantial amount of noise as determined by the discrete timesteps selected by the | |
| scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a | |
| “Mixture of Denoisers” multi-pipeline setup, as elaborated in <a href="https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output" rel="nofollow"><strong>Refining the Image | |
| Output</strong></a>`,name:"denoising_end"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>. | |
| <code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen | |
| Paper</a>. Guidance scale is enabled by setting <code>guidance_scale > 1</code>. Higher guidance scale encourages to generate images that are closely linked to the text <code>prompt</code>, | |
| usually at the expense of lower image quality.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of videos to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) in the DDIM paper: <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">https://arxiv.org/abs/2010.02502</a>. Only applies to | |
| <a href="/docs/diffusers/main/en/api/schedulers/ddim#diffusers.DDIMScheduler">schedulers.DDIMScheduler</a>, will be ignored for others.`,name:"eta"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| One or a list of <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow">torch generator(s)</a> | |
| to make generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.frame_ids",description:`<strong>frame_ids</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Indexes of the frames that are being generated. This is used when generating longer videos | |
| chunk-by-chunk.`,name:"frame_ids"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor will ge generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_x",description:`<strong>motion_field_strength_x</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along x-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_x"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.motion_field_strength_y",description:`<strong>motion_field_strength_y</strong> (<code>float</code>, <em>optional</em>, defaults to 12) — | |
| Strength of motion in generated video along y-axis. See the <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, | |
| Sect. 3.3.1.`,name:"motion_field_strength_y"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"pil"</code>) — | |
| The output format of the generate image. Choose between | |
| <a href="https://pillow.readthedocs.io/en/stable/" rel="nofollow">PIL</a>: <code>PIL.Image.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <code>~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput</code> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that will be called every <code>callback_steps</code> steps during inference. The function will be | |
| called with the following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function will be called. If not specified, the callback will be | |
| called at every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined under | |
| <code>self.processor</code> in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py" rel="nofollow">diffusers.cross_attention</a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.guidance_rescale",description:`<strong>guidance_rescale</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) — | |
| Guidance rescale factor proposed by <a href="https://arxiv.org/pdf/2305.08891.pdf" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are | |
| Flawed</a> <code>guidance_scale</code> is defined as <code>φ</code> in equation 16. of | |
| <a href="https://arxiv.org/pdf/2305.08891.pdf" rel="nofollow">Common Diffusion Noise Schedules and Sample Steps are Flawed</a>. | |
| Guidance rescale factor should fix overexposure when using zero terminal SNR.`,name:"guidance_rescale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.original_size",description:`<strong>original_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| If <code>original_size</code> is not the same as <code>target_size</code> the image will appear to be down- or upsampled. | |
| <code>original_size</code> defaults to <code>(width, height)</code> if not specified. Part of SDXL’s micro-conditioning as | |
| explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"original_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.crops_coords_top_left",description:`<strong>crops_coords_top_left</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (0, 0)) — | |
| <code>crops_coords_top_left</code> can be used to generate an image that appears to be “cropped” from the position | |
| <code>crops_coords_top_left</code> downwards. Favorable, well-centered images are usually achieved by setting | |
| <code>crops_coords_top_left</code> to (0, 0). Part of SDXL’s micro-conditioning as explained in section 2.2 of | |
| <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"crops_coords_top_left"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.target_size",description:`<strong>target_size</strong> (<code>Tuple[int]</code>, <em>optional</em>, defaults to (1024, 1024)) — | |
| For most cases, <code>target_size</code> should be set to the desired height and width of the generated image. If | |
| not specified it will default to <code>(width, height)</code>. Part of SDXL’s micro-conditioning as explained in | |
| section 2.2 of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">https://huggingface.co/papers/2307.01952</a>.`,name:"target_size"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t0",description:`<strong>t0</strong> (<code>int</code>, <em>optional</em>, defaults to 44) — | |
| Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t0"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.__call__.t1",description:`<strong>t1</strong> (<code>int</code>, <em>optional</em>, defaults to 47) — | |
| Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the | |
| <a href="https://arxiv.org/abs/2303.13439" rel="nofollow">paper</a>, Sect. 3.3.1.`,name:"t1"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L915",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> or | |
| <code>tuple</code>: <code>~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput</code> | |
| if <code>return_dict</code> is True, otherwise a <code>tuple</code>. When returning a tuple, the first element is a list with the | |
| generated images.</p> | |
| `}}),Ee=new J({props:{name:"backward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop",parameters:[{name:"latents",val:""},{name:"timesteps",val:""},{name:"prompt_embeds",val:""},{name:"guidance_scale",val:""},{name:"callback",val:""},{name:"callback_steps",val:""},{name:"num_warmup_steps",val:""},{name:"extra_step_kwargs",val:""},{name:"add_text_embeds",val:""},{name:"add_time_ids",val:""},{name:"cross_attention_kwargs",val:" = None"},{name:"guidance_rescale",val:": float = 0.0"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.backward_loop.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step. | |
| extra_step_kwargs — | |
| Extra_step_kwargs. | |
| cross_attention_kwargs — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>. | |
| num_warmup_steps — | |
| number of warmup steps.`,name:"callback_steps"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L830",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents of backward process output at time timesteps[-1]</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>latents</p> | |
| `}}),Ye=new J({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt",parameters:[{name:"prompt",val:": str"},{name:"prompt_2",val:": Optional = None"},{name:"device",val:": Optional = None"},{name:"num_images_per_prompt",val:": int = 1"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"negative_prompt",val:": Optional = None"},{name:"negative_prompt_2",val:": Optional = None"},{name:"prompt_embeds",val:": Optional = None"},{name:"negative_prompt_embeds",val:": Optional = None"},{name:"pooled_prompt_embeds",val:": Optional = None"},{name:"negative_pooled_prompt_embeds",val:": Optional = None"},{name:"lora_scale",val:": Optional = None"},{name:"clip_skip",val:": Optional = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_2",description:`<strong>prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to be sent to the <code>tokenizer_2</code> and <code>text_encoder_2</code>. If not defined, <code>prompt</code> is | |
| used in both text-encoders | |
| device — (<code>torch.device</code>): | |
| torch device`,name:"prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_2",description:`<strong>negative_prompt_2</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation to be sent to <code>tokenizer_2</code> and | |
| <code>text_encoder_2</code>. If not defined, <code>negative_prompt</code> is used in both text-encoders`,name:"negative_prompt_2"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.pooled_prompt_embeds",description:`<strong>pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. | |
| If not provided, pooled text embeddings will be generated from <code>prompt</code> input argument.`,name:"pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.negative_pooled_prompt_embeds",description:`<strong>negative_pooled_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, pooled negative_prompt_embeds will be generated from <code>negative_prompt</code> | |
| input argument.`,name:"negative_pooled_prompt_embeds"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoZeroSDXLPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L571"}}),Fe=new J({props:{name:"forward_loop",anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop",parameters:[{name:"x_t0",val:""},{name:"t0",val:""},{name:"t1",val:""},{name:"generator",val:""}],parametersDescription:[{anchor:"diffusers.TextToVideoZeroSDXLPipeline.forward_loop.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py#L806",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Forward process applied to x_t0 from time t0 to t1.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>x_t1</p> | |
| `}}),He=new M({props:{title:"TextToVideoPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",headingTag:"h2"}}),Ae=new J({props:{name:"class diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput",parameters:[{name:"images",val:": Union"},{name:"nsfw_content_detected",val:": Optional"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.images",description:`<strong>images</strong> (<code>[List[PIL.Image.Image]</code>, <code>np.ndarray</code>]) — | |
| List of denoised PIL images of length <code>batch_size</code> or NumPy array of shape <code>(batch_size, height, width, num_channels)</code>.`,name:"images"},{anchor:"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput.nsfw_content_detected",description:`<strong>nsfw_content_detected</strong> (<code>[List[bool]]</code>) — | |
| List indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content or | |
| <code>None</code> if safety checking could not be performed.`,name:"nsfw_content_detected"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py#L182"}}),qe=new Us({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video_zero.md"}}),{c(){T=i("meta"),L=s(),Z=i("p"),Oe=s(),p(D.$$.fragment),$t=s(),Q=i("p"),Q.innerHTML=kn,Pt=s(),E=i("p"),E.textContent=Vn,Gt=s(),Y=i("ol"),Y.innerHTML=In,St=s(),F=i("p"),F.textContent=Cn,Nt=s(),H=i("p"),H.innerHTML=Xn,zt=s(),A=i("p"),A.textContent=Wn,Bt=s(),q=i("p"),q.innerHTML=$n,Rt=s(),O=i("p"),O.innerHTML=Pn,Lt=s(),p(K.$$.fragment),Dt=s(),p(ee.$$.fragment),Qt=s(),te=i("p"),te.textContent=Gn,Et=s(),p(oe.$$.fragment),Yt=s(),ne=i("p"),ne.textContent=Sn,Ft=s(),se=i("ul"),se.innerHTML=Nn,Ht=s(),ae=i("p"),ae.textContent=zn,At=s(),p(ie.$$.fragment),qt=s(),Ke=i("ul"),X=i("li"),p(re.$$.fragment),Ro=Ts("In order to use the SDXL model when generating a video from prompt, use the "),et=i("code"),et.textContent=Bn,Lo=Ts(" pipeline:"),Ot=s(),p(le.$$.fragment),Kt=s(),p(de.$$.fragment),eo=s(),pe=i("p"),pe.textContent=Rn,to=s(),tt=i("ol"),ce=i("li"),ot=i("p"),ot.textContent=Ln,Do=s(),p(me.$$.fragment),oo=s(),k=i("ol"),V=i("li"),nt=i("p"),nt.textContent=Dn,Qo=s(),p(fe.$$.fragment),Eo=s(),st=i("p"),st.innerHTML=Qn,Yo=s(),he=i("li"),at=i("p"),at.innerHTML=En,Fo=s(),p(ue.$$.fragment),no=s(),it=i("ul"),I=i("li"),p(_e.$$.fragment),Ho=s(),rt=i("p"),rt.textContent=Yn,Ao=s(),p(ge.$$.fragment),so=s(),p(be.$$.fragment),ao=s(),Te=i("p"),Te.innerHTML=Fn,io=s(),p(ve.$$.fragment),ro=s(),xe=i("p"),xe.innerHTML=Hn,lo=s(),j=i("ol"),ye=i("li"),lt=i("p"),lt.textContent=An,qo=s(),p(we.$$.fragment),Oo=s(),Me=i("li"),dt=i("p"),dt.textContent=qn,Ko=s(),p(Ze.$$.fragment),en=s(),Je=i("li"),pt=i("p"),pt.innerHTML=On,tn=s(),p(je.$$.fragment),po=s(),p(Ue.$$.fragment),co=s(),ke=i("p"),ke.innerHTML=Kn,mo=s(),U=i("ol"),Ve=i("li"),ct=i("p"),ct.textContent=es,on=s(),p(Ie.$$.fragment),nn=s(),Ce=i("li"),mt=i("p"),mt.textContent=ts,sn=s(),p(Xe.$$.fragment),an=s(),We=i("li"),ft=i("p"),ft.innerHTML=os,rn=s(),p($e.$$.fragment),fo=s(),Pe=i("p"),Pe.innerHTML=ns,ho=s(),p(W.$$.fragment),uo=s(),p(Ge.$$.fragment),_o=s(),g=i("div"),p(Se.$$.fragment),ln=s(),ht=i("p"),ht.textContent=ss,dn=s(),ut=i("p"),ut.innerHTML=as,pn=s(),$=i("div"),p(Ne.$$.fragment),cn=s(),_t=i("p"),_t.textContent=is,mn=s(),P=i("div"),p(ze.$$.fragment),fn=s(),gt=i("p"),gt.textContent=rs,hn=s(),G=i("div"),p(Be.$$.fragment),un=s(),bt=i("p"),bt.textContent=ls,_n=s(),S=i("div"),p(Re.$$.fragment),gn=s(),Tt=i("p"),Tt.textContent=ds,go=s(),p(Le.$$.fragment),bo=s(),b=i("div"),p(De.$$.fragment),bn=s(),vt=i("p"),vt.textContent=ps,Tn=s(),xt=i("p"),xt.innerHTML=cs,vn=s(),N=i("div"),p(Qe.$$.fragment),xn=s(),yt=i("p"),yt.textContent=ms,yn=s(),z=i("div"),p(Ee.$$.fragment),wn=s(),wt=i("p"),wt.textContent=fs,Mn=s(),B=i("div"),p(Ye.$$.fragment),Zn=s(),Mt=i("p"),Mt.textContent=hs,Jn=s(),R=i("div"),p(Fe.$$.fragment),jn=s(),Zt=i("p"),Zt.textContent=us,To=s(),p(He.$$.fragment),vo=s(),C=i("div"),p(Ae.$$.fragment),Un=s(),Jt=i("p"),Jt.textContent=_s,xo=s(),p(qe.$$.fragment),yo=s(),It=i("p"),this.h()},l(e){const o=Js("svelte-u9bgzb",document.head);T=r(o,"META",{name:!0,content:!0}),o.forEach(t),L=a(e),Z=r(e,"P",{}),_(Z).forEach(t),Oe=a(e),c(D.$$.fragment,e),$t=a(e),Q=r(e,"P",{"data-svelte-h":!0}),d(Q)!=="svelte-16xpzkf"&&(Q.innerHTML=kn),Pt=a(e),E=r(e,"P",{"data-svelte-h":!0}),d(E)!=="svelte-1jaz0ks"&&(E.textContent=Vn),Gt=a(e),Y=r(e,"OL",{"data-svelte-h":!0}),d(Y)!=="svelte-19ca1wn"&&(Y.innerHTML=In),St=a(e),F=r(e,"P",{"data-svelte-h":!0}),d(F)!=="svelte-yhxhyq"&&(F.textContent=Cn),Nt=a(e),H=r(e,"P",{"data-svelte-h":!0}),d(H)!=="svelte-uupw0l"&&(H.innerHTML=Xn),zt=a(e),A=r(e,"P",{"data-svelte-h":!0}),d(A)!=="svelte-1cwsb16"&&(A.textContent=Wn),Bt=a(e),q=r(e,"P",{"data-svelte-h":!0}),d(q)!=="svelte-1rtg145"&&(q.innerHTML=$n),Rt=a(e),O=r(e,"P",{"data-svelte-h":!0}),d(O)!=="svelte-u78olw"&&(O.innerHTML=Pn),Lt=a(e),c(K.$$.fragment,e),Dt=a(e),c(ee.$$.fragment,e),Qt=a(e),te=r(e,"P",{"data-svelte-h":!0}),d(te)!=="svelte-1dpixty"&&(te.textContent=Gn),Et=a(e),c(oe.$$.fragment,e),Yt=a(e),ne=r(e,"P",{"data-svelte-h":!0}),d(ne)!=="svelte-rs2kss"&&(ne.textContent=Sn),Ft=a(e),se=r(e,"UL",{"data-svelte-h":!0}),d(se)!=="svelte-1na1mhs"&&(se.innerHTML=Nn),Ht=a(e),ae=r(e,"P",{"data-svelte-h":!0}),d(ae)!=="svelte-1xdujjh"&&(ae.textContent=zn),At=a(e),c(ie.$$.fragment,e),qt=a(e),Ke=r(e,"UL",{});var Ct=_(Ke);X=r(Ct,"LI",{});var Xt=_(X);c(re.$$.fragment,Xt),Ro=vs(Xt,"In order to use the SDXL model when generating a video from prompt, use the "),et=r(Xt,"CODE",{"data-svelte-h":!0}),d(et)!=="svelte-7o0i0w"&&(et.textContent=Bn),Lo=vs(Xt," pipeline:"),Xt.forEach(t),Ct.forEach(t),Ot=a(e),c(le.$$.fragment,e),Kt=a(e),c(de.$$.fragment,e),eo=a(e),pe=r(e,"P",{"data-svelte-h":!0}),d(pe)!=="svelte-13nlg5v"&&(pe.textContent=Rn),to=a(e),tt=r(e,"OL",{});var gs=_(tt);ce=r(gs,"LI",{});var Mo=_(ce);ot=r(Mo,"P",{"data-svelte-h":!0}),d(ot)!=="svelte-1xo2nq1"&&(ot.textContent=Ln),Do=a(Mo),c(me.$$.fragment,Mo),Mo.forEach(t),gs.forEach(t),oo=a(e),k=r(e,"OL",{start:!0});var Zo=_(k);V=r(Zo,"LI",{});var jt=_(V);nt=r(jt,"P",{"data-svelte-h":!0}),d(nt)!=="svelte-7pm43o"&&(nt.textContent=Dn),Qo=a(jt),c(fe.$$.fragment,jt),Eo=a(jt),st=r(jt,"P",{"data-svelte-h":!0}),d(st)!=="svelte-mij11h"&&(st.innerHTML=Qn),jt.forEach(t),Yo=a(Zo),he=r(Zo,"LI",{});var Jo=_(he);at=r(Jo,"P",{"data-svelte-h":!0}),d(at)!=="svelte-1wsv5bt"&&(at.innerHTML=En),Fo=a(Jo),c(ue.$$.fragment,Jo),Jo.forEach(t),Zo.forEach(t),no=a(e),it=r(e,"UL",{});var bs=_(it);I=r(bs,"LI",{});var Ut=_(I);c(_e.$$.fragment,Ut),Ho=a(Ut),rt=r(Ut,"P",{"data-svelte-h":!0}),d(rt)!=="svelte-bcy3mo"&&(rt.textContent=Yn),Ao=a(Ut),c(ge.$$.fragment,Ut),Ut.forEach(t),bs.forEach(t),so=a(e),c(be.$$.fragment,e),ao=a(e),Te=r(e,"P",{"data-svelte-h":!0}),d(Te)!=="svelte-1fgx3w6"&&(Te.innerHTML=Fn),io=a(e),c(ve.$$.fragment,e),ro=a(e),xe=r(e,"P",{"data-svelte-h":!0}),d(xe)!=="svelte-o0vmiz"&&(xe.innerHTML=Hn),lo=a(e),j=r(e,"OL",{});var kt=_(j);ye=r(kt,"LI",{});var jo=_(ye);lt=r(jo,"P",{"data-svelte-h":!0}),d(lt)!=="svelte-1xo2nq1"&&(lt.textContent=An),qo=a(jo),c(we.$$.fragment,jo),jo.forEach(t),Oo=a(kt),Me=r(kt,"LI",{});var Uo=_(Me);dt=r(Uo,"P",{"data-svelte-h":!0}),d(dt)!=="svelte-1wln6n0"&&(dt.textContent=qn),Ko=a(Uo),c(Ze.$$.fragment,Uo),Uo.forEach(t),en=a(kt),Je=r(kt,"LI",{});var ko=_(Je);pt=r(ko,"P",{"data-svelte-h":!0}),d(pt)!=="svelte-14jy04z"&&(pt.innerHTML=On),tn=a(ko),c(je.$$.fragment,ko),ko.forEach(t),kt.forEach(t),po=a(e),c(Ue.$$.fragment,e),co=a(e),ke=r(e,"P",{"data-svelte-h":!0}),d(ke)!=="svelte-x1pkth"&&(ke.innerHTML=Kn),mo=a(e),U=r(e,"OL",{});var Vt=_(U);Ve=r(Vt,"LI",{});var Vo=_(Ve);ct=r(Vo,"P",{"data-svelte-h":!0}),d(ct)!=="svelte-1xo2nq1"&&(ct.textContent=es),on=a(Vo),c(Ie.$$.fragment,Vo),Vo.forEach(t),nn=a(Vt),Ce=r(Vt,"LI",{});var Io=_(Ce);mt=r(Io,"P",{"data-svelte-h":!0}),d(mt)!=="svelte-1wln6n0"&&(mt.textContent=ts),sn=a(Io),c(Xe.$$.fragment,Io),Io.forEach(t),an=a(Vt),We=r(Vt,"LI",{});var Co=_(We);ft=r(Co,"P",{"data-svelte-h":!0}),d(ft)!=="svelte-okpmrc"&&(ft.innerHTML=os),rn=a(Co),c($e.$$.fragment,Co),Co.forEach(t),Vt.forEach(t),fo=a(e),Pe=r(e,"P",{"data-svelte-h":!0}),d(Pe)!=="svelte-18tnclt"&&(Pe.innerHTML=ns),ho=a(e),c(W.$$.fragment,e),uo=a(e),c(Ge.$$.fragment,e),_o=a(e),g=r(e,"DIV",{class:!0});var v=_(g);c(Se.$$.fragment,v),ln=a(v),ht=r(v,"P",{"data-svelte-h":!0}),d(ht)!=="svelte-1q57293"&&(ht.textContent=ss),dn=a(v),ut=r(v,"P",{"data-svelte-h":!0}),d(ut)!=="svelte-496sm0"&&(ut.innerHTML=as),pn=a(v),$=r(v,"DIV",{class:!0});var Xo=_($);c(Ne.$$.fragment,Xo),cn=a(Xo),_t=r(Xo,"P",{"data-svelte-h":!0}),d(_t)!=="svelte-50j04k"&&(_t.textContent=is),Xo.forEach(t),mn=a(v),P=r(v,"DIV",{class:!0});var Wo=_(P);c(ze.$$.fragment,Wo),fn=a(Wo),gt=r(Wo,"P",{"data-svelte-h":!0}),d(gt)!=="svelte-1cxzr1t"&&(gt.textContent=rs),Wo.forEach(t),hn=a(v),G=r(v,"DIV",{class:!0});var $o=_(G);c(Be.$$.fragment,$o),un=a($o),bt=r($o,"P",{"data-svelte-h":!0}),d(bt)!=="svelte-16q0ax1"&&(bt.textContent=ls),$o.forEach(t),_n=a(v),S=r(v,"DIV",{class:!0});var Po=_(S);c(Re.$$.fragment,Po),gn=a(Po),Tt=r(Po,"P",{"data-svelte-h":!0}),d(Tt)!=="svelte-1d8vbe1"&&(Tt.textContent=ds),Po.forEach(t),v.forEach(t),go=a(e),c(Le.$$.fragment,e),bo=a(e),b=r(e,"DIV",{class:!0});var x=_(b);c(De.$$.fragment,x),bn=a(x),vt=r(x,"P",{"data-svelte-h":!0}),d(vt)!=="svelte-pyonrv"&&(vt.textContent=ps),Tn=a(x),xt=r(x,"P",{"data-svelte-h":!0}),d(xt)!=="svelte-496sm0"&&(xt.innerHTML=cs),vn=a(x),N=r(x,"DIV",{class:!0});var Go=_(N);c(Qe.$$.fragment,Go),xn=a(Go),yt=r(Go,"P",{"data-svelte-h":!0}),d(yt)!=="svelte-v78lg8"&&(yt.textContent=ms),Go.forEach(t),yn=a(x),z=r(x,"DIV",{class:!0});var So=_(z);c(Ee.$$.fragment,So),wn=a(So),wt=r(So,"P",{"data-svelte-h":!0}),d(wt)!=="svelte-jp6j47"&&(wt.textContent=fs),So.forEach(t),Mn=a(x),B=r(x,"DIV",{class:!0});var No=_(B);c(Ye.$$.fragment,No),Zn=a(No),Mt=r(No,"P",{"data-svelte-h":!0}),d(Mt)!=="svelte-16q0ax1"&&(Mt.textContent=hs),No.forEach(t),Jn=a(x),R=r(x,"DIV",{class:!0});var zo=_(R);c(Fe.$$.fragment,zo),jn=a(zo),Zt=r(zo,"P",{"data-svelte-h":!0}),d(Zt)!=="svelte-1d8vbe1"&&(Zt.textContent=us),zo.forEach(t),x.forEach(t),To=a(e),c(He.$$.fragment,e),vo=a(e),C=r(e,"DIV",{class:!0});var Bo=_(C);c(Ae.$$.fragment,Bo),Un=a(Bo),Jt=r(Bo,"P",{"data-svelte-h":!0}),d(Jt)!=="svelte-1dgz4ei"&&(Jt.textContent=_s),Bo.forEach(t),xo=a(e),c(qe.$$.fragment,e),yo=a(e),It=r(e,"P",{}),_(It).forEach(t),this.h()},h(){y(T,"name","hf:doc:metadata"),y(T,"content",Is),y(k,"start","2"),y($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(g,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){n(document.head,T),l(e,L,o),l(e,Z,o),l(e,Oe,o),m(D,e,o),l(e,$t,o),l(e,Q,o),l(e,Pt,o),l(e,E,o),l(e,Gt,o),l(e,Y,o),l(e,St,o),l(e,F,o),l(e,Nt,o),l(e,H,o),l(e,zt,o),l(e,A,o),l(e,Bt,o),l(e,q,o),l(e,Rt,o),l(e,O,o),l(e,Lt,o),m(K,e,o),l(e,Dt,o),m(ee,e,o),l(e,Qt,o),l(e,te,o),l(e,Et,o),m(oe,e,o),l(e,Yt,o),l(e,ne,o),l(e,Ft,o),l(e,se,o),l(e,Ht,o),l(e,ae,o),l(e,At,o),m(ie,e,o),l(e,qt,o),l(e,Ke,o),n(Ke,X),m(re,X,null),n(X,Ro),n(X,et),n(X,Lo),l(e,Ot,o),m(le,e,o),l(e,Kt,o),m(de,e,o),l(e,eo,o),l(e,pe,o),l(e,to,o),l(e,tt,o),n(tt,ce),n(ce,ot),n(ce,Do),m(me,ce,null),l(e,oo,o),l(e,k,o),n(k,V),n(V,nt),n(V,Qo),m(fe,V,null),n(V,Eo),n(V,st),n(k,Yo),n(k,he),n(he,at),n(he,Fo),m(ue,he,null),l(e,no,o),l(e,it,o),n(it,I),m(_e,I,null),n(I,Ho),n(I,rt),n(I,Ao),m(ge,I,null),l(e,so,o),m(be,e,o),l(e,ao,o),l(e,Te,o),l(e,io,o),m(ve,e,o),l(e,ro,o),l(e,xe,o),l(e,lo,o),l(e,j,o),n(j,ye),n(ye,lt),n(ye,qo),m(we,ye,null),n(j,Oo),n(j,Me),n(Me,dt),n(Me,Ko),m(Ze,Me,null),n(j,en),n(j,Je),n(Je,pt),n(Je,tn),m(je,Je,null),l(e,po,o),m(Ue,e,o),l(e,co,o),l(e,ke,o),l(e,mo,o),l(e,U,o),n(U,Ve),n(Ve,ct),n(Ve,on),m(Ie,Ve,null),n(U,nn),n(U,Ce),n(Ce,mt),n(Ce,sn),m(Xe,Ce,null),n(U,an),n(U,We),n(We,ft),n(We,rn),m($e,We,null),l(e,fo,o),l(e,Pe,o),l(e,ho,o),m(W,e,o),l(e,uo,o),m(Ge,e,o),l(e,_o,o),l(e,g,o),m(Se,g,null),n(g,ln),n(g,ht),n(g,dn),n(g,ut),n(g,pn),n(g,$),m(Ne,$,null),n($,cn),n($,_t),n(g,mn),n(g,P),m(ze,P,null),n(P,fn),n(P,gt),n(g,hn),n(g,G),m(Be,G,null),n(G,un),n(G,bt),n(g,_n),n(g,S),m(Re,S,null),n(S,gn),n(S,Tt),l(e,go,o),m(Le,e,o),l(e,bo,o),l(e,b,o),m(De,b,null),n(b,bn),n(b,vt),n(b,Tn),n(b,xt),n(b,vn),n(b,N),m(Qe,N,null),n(N,xn),n(N,yt),n(b,yn),n(b,z),m(Ee,z,null),n(z,wn),n(z,wt),n(b,Mn),n(b,B),m(Ye,B,null),n(B,Zn),n(B,Mt),n(b,Jn),n(b,R),m(Fe,R,null),n(R,jn),n(R,Zt),l(e,To,o),m(He,e,o),l(e,vo,o),l(e,C,o),m(Ae,C,null),n(C,Un),n(C,Jt),l(e,xo,o),m(qe,e,o),l(e,yo,o),l(e,It,o),wo=!0},p(e,[o]){const Ct={};o&2&&(Ct.$$scope={dirty:o,ctx:e}),W.$set(Ct)},i(e){wo||(f(D.$$.fragment,e),f(K.$$.fragment,e),f(ee.$$.fragment,e),f(oe.$$.fragment,e),f(ie.$$.fragment,e),f(re.$$.fragment,e),f(le.$$.fragment,e),f(de.$$.fragment,e),f(me.$$.fragment,e),f(fe.$$.fragment,e),f(ue.$$.fragment,e),f(_e.$$.fragment,e),f(ge.$$.fragment,e),f(be.$$.fragment,e),f(ve.$$.fragment,e),f(we.$$.fragment,e),f(Ze.$$.fragment,e),f(je.$$.fragment,e),f(Ue.$$.fragment,e),f(Ie.$$.fragment,e),f(Xe.$$.fragment,e),f($e.$$.fragment,e),f(W.$$.fragment,e),f(Ge.$$.fragment,e),f(Se.$$.fragment,e),f(Ne.$$.fragment,e),f(ze.$$.fragment,e),f(Be.$$.fragment,e),f(Re.$$.fragment,e),f(Le.$$.fragment,e),f(De.$$.fragment,e),f(Qe.$$.fragment,e),f(Ee.$$.fragment,e),f(Ye.$$.fragment,e),f(Fe.$$.fragment,e),f(He.$$.fragment,e),f(Ae.$$.fragment,e),f(qe.$$.fragment,e),wo=!0)},o(e){h(D.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(oe.$$.fragment,e),h(ie.$$.fragment,e),h(re.$$.fragment,e),h(le.$$.fragment,e),h(de.$$.fragment,e),h(me.$$.fragment,e),h(fe.$$.fragment,e),h(ue.$$.fragment,e),h(_e.$$.fragment,e),h(ge.$$.fragment,e),h(be.$$.fragment,e),h(ve.$$.fragment,e),h(we.$$.fragment,e),h(Ze.$$.fragment,e),h(je.$$.fragment,e),h(Ue.$$.fragment,e),h(Ie.$$.fragment,e),h(Xe.$$.fragment,e),h($e.$$.fragment,e),h(W.$$.fragment,e),h(Ge.$$.fragment,e),h(Se.$$.fragment,e),h(Ne.$$.fragment,e),h(ze.$$.fragment,e),h(Be.$$.fragment,e),h(Re.$$.fragment,e),h(Le.$$.fragment,e),h(De.$$.fragment,e),h(Qe.$$.fragment,e),h(Ee.$$.fragment,e),h(Ye.$$.fragment,e),h(Fe.$$.fragment,e),h(He.$$.fragment,e),h(Ae.$$.fragment,e),h(qe.$$.fragment,e),wo=!1},d(e){e&&(t(L),t(Z),t(Oe),t($t),t(Q),t(Pt),t(E),t(Gt),t(Y),t(St),t(F),t(Nt),t(H),t(zt),t(A),t(Bt),t(q),t(Rt),t(O),t(Lt),t(Dt),t(Qt),t(te),t(Et),t(Yt),t(ne),t(Ft),t(se),t(Ht),t(ae),t(At),t(qt),t(Ke),t(Ot),t(Kt),t(eo),t(pe),t(to),t(tt),t(oo),t(k),t(no),t(it),t(so),t(ao),t(Te),t(io),t(ro),t(xe),t(lo),t(j),t(po),t(co),t(ke),t(mo),t(U),t(fo),t(Pe),t(ho),t(uo),t(_o),t(g),t(go),t(bo),t(b),t(To),t(vo),t(C),t(xo),t(yo),t(It)),t(T),u(D,e),u(K,e),u(ee,e),u(oe,e),u(ie,e),u(re),u(le,e),u(de,e),u(me),u(fe),u(ue),u(_e),u(ge),u(be,e),u(ve,e),u(we),u(Ze),u(je),u(Ue,e),u(Ie),u(Xe),u($e),u(W,e),u(Ge,e),u(Se),u(Ne),u(ze),u(Be),u(Re),u(Le,e),u(De),u(Qe),u(Ee),u(Ye),u(Fe),u(He,e),u(Ae),u(qe,e)}}}const Is='{"title":"Text2Video-Zero","local":"text2video-zero","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"Text-To-Video","local":"text-to-video","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Pose Control","local":"text-to-video-with-pose-control","sections":[{"title":"SDXL Support","local":"sdxl-support","sections":[],"depth":4}],"depth":3},{"title":"Text-To-Video with Edge Control","local":"text-to-video-with-edge-control","sections":[],"depth":3},{"title":"Video Instruct-Pix2Pix","local":"video-instruct-pix2pix","sections":[],"depth":3},{"title":"DreamBooth specialization","local":"dreambooth-specialization","sections":[],"depth":3}],"depth":2},{"title":"TextToVideoZeroPipeline","local":"diffusers.TextToVideoZeroPipeline","sections":[],"depth":2},{"title":"TextToVideoZeroSDXLPipeline","local":"diffusers.TextToVideoZeroSDXLPipeline","sections":[],"depth":2},{"title":"TextToVideoPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput","sections":[],"depth":2}],"depth":1}';function Cs(Wt){return ys(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ns extends Ms{constructor(T){super(),Zs(this,T,Cs,Vs,xs,{})}}export{Ns as component}; | |
Xet Storage Details
- Size:
- 99 kB
- Xet hash:
- 34fd2e67496e816039f30bc0de3982d06fbde0e7d95a81a14597c23cbbb72ad4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.