Buckets:
| import{s as Vo,o as jo,n as Jt}from"../chunks/scheduler.8c3d61f6.js";import{S as ko,i as $o,g as a,s,r as f,A as Do,h as r,f as o,c as i,j as S,u,x as d,k as C,y as p,a as n,v as h,d as g,t as _,w as v}from"../chunks/index.da70eac4.js";import{T as Zo}from"../chunks/Tip.1d9b8c37.js";import{D as Me}from"../chunks/Docstring.6b390b9a.js";import{C as Ze}from"../chunks/CodeBlock.00a903b3.js";import{E as Jo}from"../chunks/ExampleCodeBlock.db12be95.js";import{H as B,E as So}from"../chunks/EditOnGithub.1e64e623.js";function Co(j){let l,Z="🧪 This pipeline is for research purposes only.";return{c(){l=a("p"),l.textContent=Z},l(m){l=r(m,"P",{"data-svelte-h":!0}),d(l)!=="svelte-1oxhjjd"&&(l.textContent=Z)},m(m,b){n(m,l,b)},p:Jt,d(m){m&&o(l)}}}function Uo(j){let l,Z='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-components-across-pipelines">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){l=a("p"),l.innerHTML=Z},l(m){l=r(m,"P",{"data-svelte-h":!0}),d(l)!=="svelte-1wmc0l4"&&(l.innerHTML=Z)},m(m,b){n(m,l,b)},p:Jt,d(m){m&&o(l)}}}function Go(j){let l,Z="Examples:",m,b,w;return b=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwVGV4dFRvVmlkZW9TRFBpcGVsaW5lJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQSUwQXBpcGUlMjAlM0QlMjBUZXh0VG9WaWRlb1NEUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmRhbW8tdmlsYWIlMkZ0ZXh0LXRvLXZpZGVvLW1zLTEuN2IlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB2YXJpYW50JTNEJTIyZnAxNiUyMiUwQSklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyU3BpZGVybWFuJTIwaXMlMjBzdXJmaW5nJTIyJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> TextToVideoSDPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span>pipe = TextToVideoSDPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames) | |
| <span class="hljs-meta">>>> </span>video_path`,wrap:!1}}),{c(){l=a("p"),l.textContent=Z,m=s(),f(b.$$.fragment)},l(c){l=r(c,"P",{"data-svelte-h":!0}),d(l)!=="svelte-kvfsh7"&&(l.textContent=Z),m=i(c),u(b.$$.fragment,c)},m(c,J){n(c,l,J),n(c,m,J),h(b,c,J),w=!0},p:Jt,i(c){w||(g(b.$$.fragment,c),w=!0)},o(c){_(b.$$.fragment,c),w=!1},d(c){c&&(o(l),o(m)),v(b,c)}}}function Io(j){let l,Z="Examples:",m,b,w;return b=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfNTc2dyUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlLnNjaGVkdWxlciUyMCUzRCUyMERQTVNvbHZlck11bHRpc3RlcFNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlLnNjaGVkdWxlci5jb25maWcpJTBBcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMnNwaWRlcm1hbiUyMHJ1bm5pbmclMjBpbiUyMHRoZSUyMGRlc2VydCUyMiUwQXZpZGVvX2ZyYW1lcyUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2luZmVyZW5jZV9zdGVwcyUzRDQwJTJDJTIwaGVpZ2h0JTNEMzIwJTJDJTIwd2lkdGglM0Q1NzYlMkMlMjBudW1fZnJhbWVzJTNEMjQpLmZyYW1lcyU1QjAlNUQlMEElMjMlMjBzYWZlJTIwbG93LXJlcyUyMHZpZGVvJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGV4cG9ydF90b192aWRlbyh2aWRlb19mcmFtZXMlMkMlMjBvdXRwdXRfdmlkZW9fcGF0aCUzRCUyMi4lMkZ2aWRlb181NzZfc3BpZGVybWFuLm1wNCUyMiklMEElMEElMjMlMjBsZXQncyUyMG9mZmxvYWQlMjB0aGUlMjB0ZXh0LXRvLWltYWdlJTIwbW9kZWwlMEFwaXBlLnRvKCUyMmNwdSUyMiklMEElMEElMjMlMjBhbmQlMjBsb2FkJTIwdGhlJTIwaW1hZ2UtdG8taW1hZ2UlMjBtb2RlbCUwQXBpcGUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyY2Vyc3BlbnNlJTJGemVyb3Njb3BlX3YyX1hMJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwcmV2aXNpb24lM0QlMjJyZWZzJTJGcHIlMkYxNSUyMiUwQSklMEFwaXBlLnNjaGVkdWxlciUyMCUzRCUyMERQTVNvbHZlck11bHRpc3RlcFNjaGVkdWxlci5mcm9tX2NvbmZpZyhwaXBlLnNjaGVkdWxlci5jb25maWcpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMFRoZSUyMFZBRSUyMGNvbnN1bWVzJTIwQSUyMExPVCUyMG9mJTIwbWVtb3J5JTJDJTIwbGV0J3MlMjBtYWtlJTIwc3VyZSUyMHdlJTIwcnVuJTIwaXQlMjBpbiUyMHNsaWNlZCUyMG1vZGUlMEFwaXBlLnZhZS5lbmFibGVfc2xpY2luZygpJTBBJTBBJTIzJTIwbm93JTIwbGV0J3MlMjB1cHNjYWxlJTIwaXQlMEF2aWRlbyUyMCUzRCUyMCU1QkltYWdlLmZyb21hcnJheShmcmFtZSkucmVzaXplKCgxMDI0JTJDJTIwNTc2KSklMjBmb3IlMjBmcmFtZSUyMGluJTIwdmlkZW9fZnJhbWVzJTVEJTBBJTBBJTIzJTIwYW5kJTIwZGVub2lzZSUyMGl0JTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjB2aWRlbyUzRHZpZGVvJTJDJTIwc3RyZW5ndGglM0QwLjYpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyUyQyUyMG91dHB1dF92aWRlb19wYXRoJTNEJTIyLiUyRnZpZGVvXzEwMjRfc3BpZGVybWFuLm1wNCUyMiklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-meta">>>> </span>pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_576w"</span>, torch_dtype=torch.float16) | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"spiderman running in the desert"</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt, num_inference_steps=<span class="hljs-number">40</span>, height=<span class="hljs-number">320</span>, width=<span class="hljs-number">576</span>, num_frames=<span class="hljs-number">24</span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># safe low-res video</span> | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames, output_video_path=<span class="hljs-string">"./video_576_spiderman.mp4"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># let's offload the text-to-image model</span> | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cpu"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and load the image-to-image model</span> | |
| <span class="hljs-meta">>>> </span>pipe = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"cerspense/zeroscope_v2_XL"</span>, torch_dtype=torch.float16, revision=<span class="hljs-string">"refs/pr/15"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-meta">>>> </span>pipe.enable_model_cpu_offload() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># The VAE consumes A LOT of memory, let's make sure we run it in sliced mode</span> | |
| <span class="hljs-meta">>>> </span>pipe.vae.enable_slicing() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># now let's upscale it</span> | |
| <span class="hljs-meta">>>> </span>video = [Image.fromarray(frame).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>)) <span class="hljs-keyword">for</span> frame <span class="hljs-keyword">in</span> video_frames] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and denoise it</span> | |
| <span class="hljs-meta">>>> </span>video_frames = pipe(prompt, video=video, strength=<span class="hljs-number">0.6</span>).frames[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>video_path = export_to_video(video_frames, output_video_path=<span class="hljs-string">"./video_1024_spiderman.mp4"</span>) | |
| <span class="hljs-meta">>>> </span>video_path`,wrap:!1}}),{c(){l=a("p"),l.textContent=Z,m=s(),f(b.$$.fragment)},l(c){l=r(c,"P",{"data-svelte-h":!0}),d(l)!=="svelte-kvfsh7"&&(l.textContent=Z),m=i(c),u(b.$$.fragment,c)},m(c,J){n(c,l,J),n(c,m,J),h(b,c,J),w=!0},p:Jt,i(c){w||(g(b.$$.fragment,c),w=!0)},o(c){_(b.$$.fragment,c),w=!1},d(c){c&&(o(l),o(m)),v(b,c)}}}function Po(j){let l,Z,m,b,w,c,J,Ne,X,Yt='<a href="https://arxiv.org/abs/2308.06571" rel="nofollow">ModelScope Text-to-Video Technical Report</a> is by Jiuniu Wang, Hangjie Yuan, Dayou Chen, Yingya Zhang, Xiang Wang, Shiwei Zhang.',Re,L,Ft="The abstract from the paper is:",Ee,z,Qt='<em>This paper introduces ModelScopeT2V, a text-to-video synthesis model that evolves from a text-to-image synthesis model (i.e., Stable Diffusion). ModelScopeT2V incorporates spatio-temporal blocks to ensure consistent frame generation and smooth movement transitions. The model could adapt to varying frame numbers during training and inference, rendering it suitable for both image-text and video-text datasets. ModelScopeT2V brings together three components (i.e., VQGAN, a text encoder, and a denoising UNet), totally comprising 1.7 billion parameters, in which 0.5 billion parameters are dedicated to temporal capabilities. The model demonstrates superior performance over state-of-the-art methods across three evaluation metrics. The code and an online demo are available at <a href="https://modelscope.cn/models/damo/text-to-video-synthesis/summary" rel="nofollow">https://modelscope.cn/models/damo/text-to-video-synthesis/summary</a>.</em>',He,N,At='You can find additional information about Text-to-Video on the <a href="https://modelscope.cn/models/damo/text-to-video-synthesis/summary" rel="nofollow">project page</a>, <a href="https://github.com/modelscope/modelscope/" rel="nofollow">original codebase</a>, and try it out in a <a href="https://huggingface.co/spaces/damo-vilab/modelscope-text-to-video-synthesis" rel="nofollow">demo</a>. Official checkpoints can be found at <a href="https://huggingface.co/damo-vilab" rel="nofollow">damo-vilab</a> and <a href="https://huggingface.co/cerspense" rel="nofollow">cerspense</a>.',Ye,R,Fe,E,Qe,H,qt="Let’s start by generating a short video with the default length of 16 frames (2s at 8 fps):",Ae,Y,qe,F,Ot=`Diffusers supports different optimization techniques to improve the latency | |
| and memory footprint of a pipeline. Since videos are often more memory-heavy than images, | |
| we can enable CPU offloading and VAE slicing to keep the memory footprint at bay.`,Oe,Q,Kt="Let’s generate a video of 8 seconds (64 frames) on the same GPU using CPU offloading and VAE slicing:",Ke,A,et,q,eo="It just takes <strong>7 GBs of GPU memory</strong> to generate the 64 video frames using PyTorch 2.0, “fp16” precision and the techniques mentioned above.",tt,O,to="We can also use a different scheduler easily, using the same method we’d use for Stable Diffusion:",ot,K,nt,ee,oo="Here are some sample outputs:",st,te,no=`<tbody><tr><td><center>An astronaut riding a horse. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astr.gif" alt="An astronaut riding a horse." style="width: 300px;"/></center></td> <td><center>Darth vader surfing in waves. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/vader.gif" alt="Darth vader surfing in waves." style="width: 300px;"/></center></td></tr></tbody>`,it,oe,at,ne,so=`Zeroscope are watermark-free model and have been trained on specific sizes such as <code>576x320</code> and <code>1024x576</code>. | |
| One should first generate a video using the lower resolution checkpoint <a href="https://huggingface.co/cerspense/zeroscope_v2_576w" rel="nofollow"><code>cerspense/zeroscope_v2_576w</code></a> with <a href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.TextToVideoSDPipeline">TextToVideoSDPipeline</a>, | |
| which can then be upscaled using <a href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.VideoToVideoSDPipeline">VideoToVideoSDPipeline</a> and <a href="https://huggingface.co/cerspense/zeroscope_v2_XL" rel="nofollow"><code>cerspense/zeroscope_v2_XL</code></a>.`,rt,se,lt,ie,io="Now the video can be upscaled:",pt,ae,dt,re,ao="Here are some sample outputs:",ct,le,ro=`<tbody><tr><td><center>Darth vader surfing in waves. | |
| <br/> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/darthvader_cerpense.gif" alt="Darth vader surfing in waves." style="width: 576px;"/></center></td></tr></tbody>`,mt,pe,ft,de,lo="Video generation is memory-intensive and one way to reduce your memory usage is to set <code>enable_forward_chunking</code> on the pipeline’s UNet so you don’t run the entire feedforward layer at once. Breaking it up into chunks in a loop is more efficient.",ut,ce,po='Check out the <a href="text-img2vid">Text or image-to-video</a> guide for more details about how certain parameters can affect video generation and how to optimize inference by reducing memory usage.',ht,U,gt,me,_t,T,fe,Vt,Je,co="Pipeline for text-to-video generation.",jt,Ve,mo=`This model inherits from <a href="/docs/diffusers/pr_10312/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,kt,je,fo="The pipeline also inherits the following loading methods:",$t,ke,uo='<li><a href="/docs/diffusers/pr_10312/en/api/loaders/textual_inversion#diffusers.loaders.TextualInversionLoaderMixin.load_textual_inversion">load_textual_inversion()</a> for loading textual inversion embeddings</li> <li><a href="/docs/diffusers/pr_10312/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> for loading LoRA weights</li> <li><a href="/docs/diffusers/pr_10312/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.save_lora_weights">save_lora_weights()</a> for saving LoRA weights</li>',Dt,k,ue,St,$e,ho="The call function to the pipeline for generation.",Ct,G,Ut,I,he,Gt,De,go="Encodes the prompt into text encoder hidden states.",vt,ge,bt,y,_e,It,Se,_o="Pipeline for text-guided video-to-video generation.",Pt,Ce,vo=`This model inherits from <a href="/docs/diffusers/pr_10312/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods | |
| implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Wt,Ue,bo="The pipeline also inherits the following loading methods:",Bt,Ge,To='<li><a href="/docs/diffusers/pr_10312/en/api/loaders/textual_inversion#diffusers.loaders.TextualInversionLoaderMixin.load_textual_inversion">load_textual_inversion()</a> for loading textual inversion embeddings</li> <li><a href="/docs/diffusers/pr_10312/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights">load_lora_weights()</a> for loading LoRA weights</li> <li><a href="/docs/diffusers/pr_10312/en/api/loaders/lora#diffusers.loaders.StableDiffusionLoraLoaderMixin.save_lora_weights">save_lora_weights()</a> for saving LoRA weights</li>',Xt,$,ve,Lt,Ie,yo="The call function to the pipeline for generation.",zt,P,Nt,W,be,Rt,Pe,wo="Encodes the prompt into text encoder hidden states.",Tt,Te,yt,V,ye,Et,We,xo="Output class for text-to-video pipelines.",Ht,Be,Mo=`PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape | |
| <code>(batch_size, num_frames, channels, height, width)</code>`,wt,we,xt,ze,Mt;return w=new Zo({props:{warning:!0,$$slots:{default:[Co]},$$scope:{ctx:j}}}),J=new B({props:{title:"Text-to-video",local:"text-to-video",headingTag:"h1"}}),R=new B({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),E=new B({props:{title:"text-to-video-ms-1.7b",local:"text-to-video-ms-17b",headingTag:"h3"}}),Y=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZSUyMCUzRCUyMHBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJTcGlkZXJtYW4lMjBpcyUyMHN1cmZpbmclMjIlMEF2aWRlb19mcmFtZXMlMjAlM0QlMjBwaXBlKHByb21wdCkuZnJhbWVzJTVCMCU1RCUwQXZpZGVvX3BhdGglMjAlM0QlMjBleHBvcnRfdG9fdmlkZW8odmlkZW9fZnJhbWVzKSUwQXZpZGVvX3BhdGg=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| video_frames = pipe(prompt).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),A=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMG1lbW9yeSUyMG9wdGltaXphdGlvbiUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJEYXJ0aCUyMFZhZGVyJTIwc3VyZmluZyUyMGElMjB3YXZlJTIyJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjBudW1fZnJhbWVzJTNENjQpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.enable_vae_slicing() | |
| prompt = <span class="hljs-string">"Darth Vader surfing a wave"</span> | |
| video_frames = pipe(prompt, num_frames=<span class="hljs-number">64</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),K=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJkYW1vLXZpbGFiJTJGdGV4dC10by12aWRlby1tcy0xLjdiJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdmFyaWFudCUzRCUyMmZwMTYlMjIpJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJTcGlkZXJtYW4lMjBpcyUyMHN1cmZpbmclMjIlMEF2aWRlb19mcmFtZXMlMjAlM0QlMjBwaXBlKHByb21wdCUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0QyNSkuZnJhbWVzJTVCMCU1RCUwQXZpZGVvX3BhdGglMjAlM0QlMjBleHBvcnRfdG9fdmlkZW8odmlkZW9fZnJhbWVzKSUwQXZpZGVvX3BhdGg=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"damo-vilab/text-to-video-ms-1.7b"</span>, torch_dtype=torch.float16, variant=<span class="hljs-string">"fp16"</span>) | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"Spiderman is surfing"</span> | |
| video_frames = pipe(prompt, num_inference_steps=<span class="hljs-number">25</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),oe=new B({props:{title:"cerspense/zeroscope_v2_576w & cerspense/zeroscope_v2_XL",local:"cerspensezeroscopev2576w--cerspensezeroscopev2xl",headingTag:"h3"}}),se=new Ze({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMkMlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTBBZnJvbSUyMFBJTCUyMGltcG9ydCUyMEltYWdlJTBBJTBBcGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfNTc2dyUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBJTBBJTIzJTIwbWVtb3J5JTIwb3B0aW1pemF0aW9uJTBBcGlwZS51bmV0LmVuYWJsZV9mb3J3YXJkX2NodW5raW5nKGNodW5rX3NpemUlM0QxJTJDJTIwZGltJTNEMSklMEFwaXBlLmVuYWJsZV92YWVfc2xpY2luZygpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyRGFydGglMjBWYWRlciUyMHN1cmZpbmclMjBhJTIwd2F2ZSUyMiUwQXZpZGVvX2ZyYW1lcyUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2ZyYW1lcyUzRDI0KS5mcmFtZXMlNUIwJTVEJTBBdmlkZW9fcGF0aCUyMCUzRCUyMGV4cG9ydF90b192aWRlbyh2aWRlb19mcmFtZXMpJTBBdmlkZW9fcGF0aA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_576w"</span>, torch_dtype=torch.float16) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.unet.enable_forward_chunking(chunk_size=<span class="hljs-number">1</span>, dim=<span class="hljs-number">1</span>) | |
| pipe.enable_vae_slicing() | |
| prompt = <span class="hljs-string">"Darth Vader surfing a wave"</span> | |
| video_frames = pipe(prompt, num_frames=<span class="hljs-number">24</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),ae=new Ze({props:{code:"cGlwZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMjJjZXJzcGVuc2UlMkZ6ZXJvc2NvcGVfdjJfWEwlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZS5zY2hlZHVsZXIlMjAlM0QlMjBEUE1Tb2x2ZXJNdWx0aXN0ZXBTY2hlZHVsZXIuZnJvbV9jb25maWcocGlwZS5zY2hlZHVsZXIuY29uZmlnKSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEElMjMlMjBtZW1vcnklMjBvcHRpbWl6YXRpb24lMEFwaXBlLnVuZXQuZW5hYmxlX2ZvcndhcmRfY2h1bmtpbmcoY2h1bmtfc2l6ZSUzRDElMkMlMjBkaW0lM0QxKSUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEElMEF2aWRlbyUyMCUzRCUyMCU1QkltYWdlLmZyb21hcnJheShmcmFtZSkucmVzaXplKCgxMDI0JTJDJTIwNTc2KSklMjBmb3IlMjBmcmFtZSUyMGluJTIwdmlkZW9fZnJhbWVzJTVEJTBBJTBBdmlkZW9fZnJhbWVzJTIwJTNEJTIwcGlwZShwcm9tcHQlMkMlMjB2aWRlbyUzRHZpZGVvJTJDJTIwc3RyZW5ndGglM0QwLjYpLmZyYW1lcyU1QjAlNUQlMEF2aWRlb19wYXRoJTIwJTNEJTIwZXhwb3J0X3RvX3ZpZGVvKHZpZGVvX2ZyYW1lcyklMEF2aWRlb19wYXRo",highlighted:`pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"cerspense/zeroscope_v2_XL"</span>, torch_dtype=torch.float16) | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.enable_model_cpu_offload() | |
| <span class="hljs-comment"># memory optimization</span> | |
| pipe.unet.enable_forward_chunking(chunk_size=<span class="hljs-number">1</span>, dim=<span class="hljs-number">1</span>) | |
| pipe.enable_vae_slicing() | |
| video = [Image.fromarray(frame).resize((<span class="hljs-number">1024</span>, <span class="hljs-number">576</span>)) <span class="hljs-keyword">for</span> frame <span class="hljs-keyword">in</span> video_frames] | |
| video_frames = pipe(prompt, video=video, strength=<span class="hljs-number">0.6</span>).frames[<span class="hljs-number">0</span>] | |
| video_path = export_to_video(video_frames) | |
| video_path`,wrap:!1}}),pe=new B({props:{title:"Tips",local:"tips",headingTag:"h2"}}),U=new Zo({props:{$$slots:{default:[Uo]},$$scope:{ctx:j}}}),me=new B({props:{title:"TextToVideoSDPipeline",local:"diffusers.TextToVideoSDPipeline",headingTag:"h2"}}),fe=new Me({props:{name:"class diffusers.TextToVideoSDPipeline",anchor:"diffusers.TextToVideoSDPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet3DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.`,name:"vae"},{anchor:"diffusers.TextToVideoSDPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.TextToVideoSDPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.TextToVideoSDPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_10312/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.TextToVideoSDPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10312/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_10312/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_10312/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L61"}}),ue=new Me({props:{name:"__call__",anchor:"diffusers.TextToVideoSDPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"height",val:": typing.Optional[int] = None"},{name:"width",val:": typing.Optional[int] = None"},{name:"num_frames",val:": int = 16"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 9.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The height in pixels of the generated video.`,name:"height"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, <em>optional</em>, defaults to <code>self.unet.config.sample_size * self.vae_scale_factor</code>) — | |
| The width in pixels of the generated video.`,name:"width"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, <em>optional</em>, defaults to 16) — | |
| The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds | |
| amounts to 2 seconds of video.`,name:"num_frames"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality videos at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in image generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The number of images to generate per prompt.`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>. Latents should be of shape | |
| <code>(batch_size, num_channel, num_frames, height, width)</code>.`,name:"latents"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>torch.Tensor</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput">TextToVideoSDPipelineOutput</a> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.TextToVideoSDPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L435",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> is | |
| returned, otherwise a <code>tuple</code> is returned where the first element is a list with the generated frames.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),G=new Jo({props:{anchor:"diffusers.TextToVideoSDPipeline.__call__.example",$$slots:{default:[Go]},$$scope:{ctx:j}}}),he=new Me({props:{name:"encode_prompt",anchor:"diffusers.TextToVideoSDPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.TextToVideoSDPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L145"}}),ge=new B({props:{title:"VideoToVideoSDPipeline",local:"diffusers.VideoToVideoSDPipeline",headingTag:"h2"}}),_e=new Me({props:{name:"class diffusers.VideoToVideoSDPipeline",anchor:"diffusers.VideoToVideoSDPipeline",parameters:[{name:"vae",val:": AutoencoderKL"},{name:"text_encoder",val:": CLIPTextModel"},{name:"tokenizer",val:": CLIPTokenizer"},{name:"unet",val:": UNet3DConditionModel"},{name:"scheduler",val:": KarrasDiffusionSchedulers"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/autoencoderkl#diffusers.AutoencoderKL">AutoencoderKL</a>) — | |
| Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"},{anchor:"diffusers.VideoToVideoSDPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>CLIPTextModel</code>) — | |
| Frozen text-encoder (<a href="https://huggingface.co/openai/clip-vit-large-patch14" rel="nofollow">clip-vit-large-patch14</a>).`,name:"text_encoder"},{anchor:"diffusers.VideoToVideoSDPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>CLIPTokenizer</code>) — | |
| A <a href="https://huggingface.co/docs/transformers/main/en/model_doc/clip#transformers.CLIPTokenizer" rel="nofollow">CLIPTokenizer</a> to tokenize text.`,name:"tokenizer"},{anchor:"diffusers.VideoToVideoSDPipeline.unet",description:`<strong>unet</strong> (<a href="/docs/diffusers/pr_10312/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a>) — | |
| A <a href="/docs/diffusers/pr_10312/en/api/models/unet3d-cond#diffusers.UNet3DConditionModel">UNet3DConditionModel</a> to denoise the encoded video latents.`,name:"unet"},{anchor:"diffusers.VideoToVideoSDPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_10312/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) — | |
| A scheduler to be used in combination with <code>unet</code> to denoise the encoded image latents. Can be one of | |
| <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, <a href="/docs/diffusers/pr_10312/en/api/schedulers/lms_discrete#diffusers.LMSDiscreteScheduler">LMSDiscreteScheduler</a>, or <a href="/docs/diffusers/pr_10312/en/api/schedulers/pndm#diffusers.PNDMScheduler">PNDMScheduler</a>.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L96"}}),ve=new Me({props:{name:"__call__",anchor:"diffusers.VideoToVideoSDPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"video",val:": typing.Union[typing.List[numpy.ndarray], torch.Tensor] = None"},{name:"strength",val:": float = 0.6"},{name:"num_inference_steps",val:": int = 50"},{name:"guidance_scale",val:": float = 15.0"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"eta",val:": float = 0.0"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'np'"},{name:"return_dict",val:": bool = True"},{name:"callback",val:": typing.Optional[typing.Callable[[int, int, torch.Tensor], NoneType]] = None"},{name:"callback_steps",val:": int = 1"},{name:"cross_attention_kwargs",val:": typing.Optional[typing.Dict[str, typing.Any]] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide image generation. If not defined, you need to pass <code>prompt_embeds</code>.`,name:"prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.video",description:`<strong>video</strong> (<code>List[np.ndarray]</code> or <code>torch.Tensor</code>) — | |
| <code>video</code> frames or tensor representing a video batch to be used as the starting point for the process. | |
| Can also accept video latents as <code>image</code>, if passing latents directly, it will not be encoded again.`,name:"video"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.strength",description:`<strong>strength</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| Indicates extent to transform the reference <code>video</code>. Must be between 0 and 1. <code>video</code> is used as a | |
| starting point, adding more noise to it the larger the <code>strength</code>. The number of denoising steps | |
| depends on the amount of noise initially added. When <code>strength</code> is 1, added noise is maximum and the | |
| denoising process runs for the full number of iterations specified in <code>num_inference_steps</code>. A value of | |
| 1 essentially ignores <code>video</code>.`,name:"strength"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 50) — | |
| The number of denoising steps. More denoising steps usually lead to a higher quality videos at the | |
| expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, <em>optional</em>, defaults to 7.5) — | |
| A higher guidance scale value encourages the model to generate images closely linked to the text | |
| <code>prompt</code> at the expense of lower image quality. Guidance scale is enabled when <code>guidance_scale > 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts to guide what to not include in video generation. If not defined, you need to | |
| pass <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (<code>guidance_scale < 1</code>).`,name:"negative_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.eta",description:`<strong>eta</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| Corresponds to parameter eta (η) from the <a href="https://arxiv.org/abs/2010.02502" rel="nofollow">DDIM</a> paper. Only applies | |
| to the <a href="/docs/diffusers/pr_10312/en/api/schedulers/ddim#diffusers.DDIMScheduler">DDIMScheduler</a>, and is ignored in other schedulers.`,name:"eta"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) — | |
| A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make | |
| generation deterministic.`,name:"generator"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video | |
| generation. Can be used to tweak the same generation with different prompts. If not provided, a latents | |
| tensor is generated by sampling using the supplied random <code>generator</code>. Latents should be of shape | |
| <code>(batch_size, num_channel, num_frames, height, width)</code>.`,name:"latents"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not | |
| provided, text embeddings are generated from the <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If | |
| not provided, <code>negative_prompt_embeds</code> are generated from the <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"np"</code>) — | |
| The output format of the generated video. Choose between <code>torch.Tensor</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to return a <a href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput">TextToVideoSDPipelineOutput</a> instead | |
| of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.callback",description:`<strong>callback</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that calls every <code>callback_steps</code> steps during inference. The function is called with the | |
| following arguments: <code>callback(step: int, timestep: int, latents: torch.Tensor)</code>.`,name:"callback"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.callback_steps",description:`<strong>callback_steps</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The frequency at which the <code>callback</code> function is called. If not specified, the callback is called at | |
| every step.`,name:"callback_steps"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.cross_attention_kwargs",description:`<strong>cross_attention_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| A kwargs dictionary that if specified is passed along to the <code>AttentionProcessor</code> as defined in | |
| <a href="https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py" rel="nofollow"><code>self.processor</code></a>.`,name:"cross_attention_kwargs"},{anchor:"diffusers.VideoToVideoSDPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L500",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>If <code>return_dict</code> is <code>True</code>, <a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> is | |
| returned, otherwise a <code>tuple</code> is returned where the first element is a list with the generated frames.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/diffusers/pr_10312/en/api/pipelines/text_to_video#diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput" | |
| >TextToVideoSDPipelineOutput</a> or <code>tuple</code></p> | |
| `}}),P=new Jo({props:{anchor:"diffusers.VideoToVideoSDPipeline.__call__.example",$$slots:{default:[Io]},$$scope:{ctx:j}}}),be=new Me({props:{name:"encode_prompt",anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt",parameters:[{name:"prompt",val:""},{name:"device",val:""},{name:"num_images_per_prompt",val:""},{name:"do_classifier_free_guidance",val:""},{name:"negative_prompt",val:" = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"lora_scale",val:": typing.Optional[float] = None"},{name:"clip_skip",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| prompt to be encoded`,name:"prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.device",description:`<strong>device</strong> — (<code>torch.device</code>): | |
| torch device`,name:"device"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.num_images_per_prompt",description:`<strong>num_images_per_prompt</strong> (<code>int</code>) — | |
| number of images that should be generated per prompt`,name:"num_images_per_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>) — | |
| whether to use classifier free guidance or not`,name:"do_classifier_free_guidance"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) — | |
| The prompt or prompts not to guide the image generation. If not defined, one has to pass | |
| <code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is | |
| less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not | |
| provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) — | |
| Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt | |
| weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input | |
| argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.lora_scale",description:`<strong>lora_scale</strong> (<code>float</code>, <em>optional</em>) — | |
| A LoRA scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.`,name:"lora_scale"},{anchor:"diffusers.VideoToVideoSDPipeline.encode_prompt.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that | |
| the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py#L180"}}),Te=new B({props:{title:"TextToVideoSDPipelineOutput",local:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",headingTag:"h2"}}),ye=new Me({props:{name:"class diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",anchor:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput",parameters:[{name:"frames",val:": typing.Union[torch.Tensor, numpy.ndarray, typing.List[typing.List[PIL.Image.Image]]]"}],parametersDescription:[{anchor:"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) — | |
| List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing | |
| denoised`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_10312/src/diffusers/pipelines/text_to_video_synthesis/pipeline_output.py#L13"}}),we=new So({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/text_to_video.md"}}),{c(){l=a("meta"),Z=s(),m=a("p"),b=s(),f(w.$$.fragment),c=s(),f(J.$$.fragment),Ne=s(),X=a("p"),X.innerHTML=Yt,Re=s(),L=a("p"),L.textContent=Ft,Ee=s(),z=a("p"),z.innerHTML=Qt,He=s(),N=a("p"),N.innerHTML=At,Ye=s(),f(R.$$.fragment),Fe=s(),f(E.$$.fragment),Qe=s(),H=a("p"),H.textContent=qt,Ae=s(),f(Y.$$.fragment),qe=s(),F=a("p"),F.textContent=Ot,Oe=s(),Q=a("p"),Q.textContent=Kt,Ke=s(),f(A.$$.fragment),et=s(),q=a("p"),q.innerHTML=eo,tt=s(),O=a("p"),O.textContent=to,ot=s(),f(K.$$.fragment),nt=s(),ee=a("p"),ee.textContent=oo,st=s(),te=a("table"),te.innerHTML=no,it=s(),f(oe.$$.fragment),at=s(),ne=a("p"),ne.innerHTML=so,rt=s(),f(se.$$.fragment),lt=s(),ie=a("p"),ie.textContent=io,pt=s(),f(ae.$$.fragment),dt=s(),re=a("p"),re.textContent=ao,ct=s(),le=a("table"),le.innerHTML=ro,mt=s(),f(pe.$$.fragment),ft=s(),de=a("p"),de.innerHTML=lo,ut=s(),ce=a("p"),ce.innerHTML=po,ht=s(),f(U.$$.fragment),gt=s(),f(me.$$.fragment),_t=s(),T=a("div"),f(fe.$$.fragment),Vt=s(),Je=a("p"),Je.textContent=co,jt=s(),Ve=a("p"),Ve.innerHTML=mo,kt=s(),je=a("p"),je.textContent=fo,$t=s(),ke=a("ul"),ke.innerHTML=uo,Dt=s(),k=a("div"),f(ue.$$.fragment),St=s(),$e=a("p"),$e.textContent=ho,Ct=s(),f(G.$$.fragment),Ut=s(),I=a("div"),f(he.$$.fragment),Gt=s(),De=a("p"),De.textContent=go,vt=s(),f(ge.$$.fragment),bt=s(),y=a("div"),f(_e.$$.fragment),It=s(),Se=a("p"),Se.textContent=_o,Pt=s(),Ce=a("p"),Ce.innerHTML=vo,Wt=s(),Ue=a("p"),Ue.textContent=bo,Bt=s(),Ge=a("ul"),Ge.innerHTML=To,Xt=s(),$=a("div"),f(ve.$$.fragment),Lt=s(),Ie=a("p"),Ie.textContent=yo,zt=s(),f(P.$$.fragment),Nt=s(),W=a("div"),f(be.$$.fragment),Rt=s(),Pe=a("p"),Pe.textContent=wo,Tt=s(),f(Te.$$.fragment),yt=s(),V=a("div"),f(ye.$$.fragment),Et=s(),We=a("p"),We.textContent=xo,Ht=s(),Be=a("p"),Be.innerHTML=Mo,wt=s(),f(we.$$.fragment),xt=s(),ze=a("p"),this.h()},l(e){const t=Do("svelte-u9bgzb",document.head);l=r(t,"META",{name:!0,content:!0}),t.forEach(o),Z=i(e),m=r(e,"P",{}),S(m).forEach(o),b=i(e),u(w.$$.fragment,e),c=i(e),u(J.$$.fragment,e),Ne=i(e),X=r(e,"P",{"data-svelte-h":!0}),d(X)!=="svelte-17trsey"&&(X.innerHTML=Yt),Re=i(e),L=r(e,"P",{"data-svelte-h":!0}),d(L)!=="svelte-1cwsb16"&&(L.textContent=Ft),Ee=i(e),z=r(e,"P",{"data-svelte-h":!0}),d(z)!=="svelte-1ovx0um"&&(z.innerHTML=Qt),He=i(e),N=r(e,"P",{"data-svelte-h":!0}),d(N)!=="svelte-9292p9"&&(N.innerHTML=At),Ye=i(e),u(R.$$.fragment,e),Fe=i(e),u(E.$$.fragment,e),Qe=i(e),H=r(e,"P",{"data-svelte-h":!0}),d(H)!=="svelte-a4ktac"&&(H.textContent=qt),Ae=i(e),u(Y.$$.fragment,e),qe=i(e),F=r(e,"P",{"data-svelte-h":!0}),d(F)!=="svelte-1gwbshq"&&(F.textContent=Ot),Oe=i(e),Q=r(e,"P",{"data-svelte-h":!0}),d(Q)!=="svelte-yoc5w4"&&(Q.textContent=Kt),Ke=i(e),u(A.$$.fragment,e),et=i(e),q=r(e,"P",{"data-svelte-h":!0}),d(q)!=="svelte-m9rmsr"&&(q.innerHTML=eo),tt=i(e),O=r(e,"P",{"data-svelte-h":!0}),d(O)!=="svelte-1yxs7ur"&&(O.textContent=to),ot=i(e),u(K.$$.fragment,e),nt=i(e),ee=r(e,"P",{"data-svelte-h":!0}),d(ee)!=="svelte-1pxzje"&&(ee.textContent=oo),st=i(e),te=r(e,"TABLE",{"data-svelte-h":!0}),d(te)!=="svelte-5s26g4"&&(te.innerHTML=no),it=i(e),u(oe.$$.fragment,e),at=i(e),ne=r(e,"P",{"data-svelte-h":!0}),d(ne)!=="svelte-klrnx4"&&(ne.innerHTML=so),rt=i(e),u(se.$$.fragment,e),lt=i(e),ie=r(e,"P",{"data-svelte-h":!0}),d(ie)!=="svelte-4ywac8"&&(ie.textContent=io),pt=i(e),u(ae.$$.fragment,e),dt=i(e),re=r(e,"P",{"data-svelte-h":!0}),d(re)!=="svelte-1pxzje"&&(re.textContent=ao),ct=i(e),le=r(e,"TABLE",{"data-svelte-h":!0}),d(le)!=="svelte-9iw63g"&&(le.innerHTML=ro),mt=i(e),u(pe.$$.fragment,e),ft=i(e),de=r(e,"P",{"data-svelte-h":!0}),d(de)!=="svelte-op8wgu"&&(de.innerHTML=lo),ut=i(e),ce=r(e,"P",{"data-svelte-h":!0}),d(ce)!=="svelte-1lb9rti"&&(ce.innerHTML=po),ht=i(e),u(U.$$.fragment,e),gt=i(e),u(me.$$.fragment,e),_t=i(e),T=r(e,"DIV",{class:!0});var x=S(T);u(fe.$$.fragment,x),Vt=i(x),Je=r(x,"P",{"data-svelte-h":!0}),d(Je)!=="svelte-19ipoo4"&&(Je.textContent=co),jt=i(x),Ve=r(x,"P",{"data-svelte-h":!0}),d(Ve)!=="svelte-1qc9xyr"&&(Ve.innerHTML=mo),kt=i(x),je=r(x,"P",{"data-svelte-h":!0}),d(je)!=="svelte-14s6m4u"&&(je.textContent=fo),$t=i(x),ke=r(x,"UL",{"data-svelte-h":!0}),d(ke)!=="svelte-1n1gzih"&&(ke.innerHTML=uo),Dt=i(x),k=r(x,"DIV",{class:!0});var D=S(k);u(ue.$$.fragment,D),St=i(D),$e=r(D,"P",{"data-svelte-h":!0}),d($e)!=="svelte-50j04k"&&($e.textContent=ho),Ct=i(D),u(G.$$.fragment,D),D.forEach(o),Ut=i(x),I=r(x,"DIV",{class:!0});var xe=S(I);u(he.$$.fragment,xe),Gt=i(xe),De=r(xe,"P",{"data-svelte-h":!0}),d(De)!=="svelte-16q0ax1"&&(De.textContent=go),xe.forEach(o),x.forEach(o),vt=i(e),u(ge.$$.fragment,e),bt=i(e),y=r(e,"DIV",{class:!0});var M=S(y);u(_e.$$.fragment,M),It=i(M),Se=r(M,"P",{"data-svelte-h":!0}),d(Se)!=="svelte-1oyadpc"&&(Se.textContent=_o),Pt=i(M),Ce=r(M,"P",{"data-svelte-h":!0}),d(Ce)!=="svelte-1qc9xyr"&&(Ce.innerHTML=vo),Wt=i(M),Ue=r(M,"P",{"data-svelte-h":!0}),d(Ue)!=="svelte-14s6m4u"&&(Ue.textContent=bo),Bt=i(M),Ge=r(M,"UL",{"data-svelte-h":!0}),d(Ge)!=="svelte-1n1gzih"&&(Ge.innerHTML=To),Xt=i(M),$=r(M,"DIV",{class:!0});var Xe=S($);u(ve.$$.fragment,Xe),Lt=i(Xe),Ie=r(Xe,"P",{"data-svelte-h":!0}),d(Ie)!=="svelte-50j04k"&&(Ie.textContent=yo),zt=i(Xe),u(P.$$.fragment,Xe),Xe.forEach(o),Nt=i(M),W=r(M,"DIV",{class:!0});var Zt=S(W);u(be.$$.fragment,Zt),Rt=i(Zt),Pe=r(Zt,"P",{"data-svelte-h":!0}),d(Pe)!=="svelte-16q0ax1"&&(Pe.textContent=wo),Zt.forEach(o),M.forEach(o),Tt=i(e),u(Te.$$.fragment,e),yt=i(e),V=r(e,"DIV",{class:!0});var Le=S(V);u(ye.$$.fragment,Le),Et=i(Le),We=r(Le,"P",{"data-svelte-h":!0}),d(We)!=="svelte-1uuckb2"&&(We.textContent=xo),Ht=i(Le),Be=r(Le,"P",{"data-svelte-h":!0}),d(Be)!=="svelte-gk6g69"&&(Be.innerHTML=Mo),Le.forEach(o),wt=i(e),u(we.$$.fragment,e),xt=i(e),ze=r(e,"P",{}),S(ze).forEach(o),this.h()},h(){C(l,"name","hf:doc:metadata"),C(l,"content",Wo),C(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){p(document.head,l),n(e,Z,t),n(e,m,t),n(e,b,t),h(w,e,t),n(e,c,t),h(J,e,t),n(e,Ne,t),n(e,X,t),n(e,Re,t),n(e,L,t),n(e,Ee,t),n(e,z,t),n(e,He,t),n(e,N,t),n(e,Ye,t),h(R,e,t),n(e,Fe,t),h(E,e,t),n(e,Qe,t),n(e,H,t),n(e,Ae,t),h(Y,e,t),n(e,qe,t),n(e,F,t),n(e,Oe,t),n(e,Q,t),n(e,Ke,t),h(A,e,t),n(e,et,t),n(e,q,t),n(e,tt,t),n(e,O,t),n(e,ot,t),h(K,e,t),n(e,nt,t),n(e,ee,t),n(e,st,t),n(e,te,t),n(e,it,t),h(oe,e,t),n(e,at,t),n(e,ne,t),n(e,rt,t),h(se,e,t),n(e,lt,t),n(e,ie,t),n(e,pt,t),h(ae,e,t),n(e,dt,t),n(e,re,t),n(e,ct,t),n(e,le,t),n(e,mt,t),h(pe,e,t),n(e,ft,t),n(e,de,t),n(e,ut,t),n(e,ce,t),n(e,ht,t),h(U,e,t),n(e,gt,t),h(me,e,t),n(e,_t,t),n(e,T,t),h(fe,T,null),p(T,Vt),p(T,Je),p(T,jt),p(T,Ve),p(T,kt),p(T,je),p(T,$t),p(T,ke),p(T,Dt),p(T,k),h(ue,k,null),p(k,St),p(k,$e),p(k,Ct),h(G,k,null),p(T,Ut),p(T,I),h(he,I,null),p(I,Gt),p(I,De),n(e,vt,t),h(ge,e,t),n(e,bt,t),n(e,y,t),h(_e,y,null),p(y,It),p(y,Se),p(y,Pt),p(y,Ce),p(y,Wt),p(y,Ue),p(y,Bt),p(y,Ge),p(y,Xt),p(y,$),h(ve,$,null),p($,Lt),p($,Ie),p($,zt),h(P,$,null),p(y,Nt),p(y,W),h(be,W,null),p(W,Rt),p(W,Pe),n(e,Tt,t),h(Te,e,t),n(e,yt,t),n(e,V,t),h(ye,V,null),p(V,Et),p(V,We),p(V,Ht),p(V,Be),n(e,wt,t),h(we,e,t),n(e,xt,t),n(e,ze,t),Mt=!0},p(e,[t]){const x={};t&2&&(x.$$scope={dirty:t,ctx:e}),w.$set(x);const D={};t&2&&(D.$$scope={dirty:t,ctx:e}),U.$set(D);const xe={};t&2&&(xe.$$scope={dirty:t,ctx:e}),G.$set(xe);const M={};t&2&&(M.$$scope={dirty:t,ctx:e}),P.$set(M)},i(e){Mt||(g(w.$$.fragment,e),g(J.$$.fragment,e),g(R.$$.fragment,e),g(E.$$.fragment,e),g(Y.$$.fragment,e),g(A.$$.fragment,e),g(K.$$.fragment,e),g(oe.$$.fragment,e),g(se.$$.fragment,e),g(ae.$$.fragment,e),g(pe.$$.fragment,e),g(U.$$.fragment,e),g(me.$$.fragment,e),g(fe.$$.fragment,e),g(ue.$$.fragment,e),g(G.$$.fragment,e),g(he.$$.fragment,e),g(ge.$$.fragment,e),g(_e.$$.fragment,e),g(ve.$$.fragment,e),g(P.$$.fragment,e),g(be.$$.fragment,e),g(Te.$$.fragment,e),g(ye.$$.fragment,e),g(we.$$.fragment,e),Mt=!0)},o(e){_(w.$$.fragment,e),_(J.$$.fragment,e),_(R.$$.fragment,e),_(E.$$.fragment,e),_(Y.$$.fragment,e),_(A.$$.fragment,e),_(K.$$.fragment,e),_(oe.$$.fragment,e),_(se.$$.fragment,e),_(ae.$$.fragment,e),_(pe.$$.fragment,e),_(U.$$.fragment,e),_(me.$$.fragment,e),_(fe.$$.fragment,e),_(ue.$$.fragment,e),_(G.$$.fragment,e),_(he.$$.fragment,e),_(ge.$$.fragment,e),_(_e.$$.fragment,e),_(ve.$$.fragment,e),_(P.$$.fragment,e),_(be.$$.fragment,e),_(Te.$$.fragment,e),_(ye.$$.fragment,e),_(we.$$.fragment,e),Mt=!1},d(e){e&&(o(Z),o(m),o(b),o(c),o(Ne),o(X),o(Re),o(L),o(Ee),o(z),o(He),o(N),o(Ye),o(Fe),o(Qe),o(H),o(Ae),o(qe),o(F),o(Oe),o(Q),o(Ke),o(et),o(q),o(tt),o(O),o(ot),o(nt),o(ee),o(st),o(te),o(it),o(at),o(ne),o(rt),o(lt),o(ie),o(pt),o(dt),o(re),o(ct),o(le),o(mt),o(ft),o(de),o(ut),o(ce),o(ht),o(gt),o(_t),o(T),o(vt),o(bt),o(y),o(Tt),o(yt),o(V),o(wt),o(xt),o(ze)),o(l),v(w,e),v(J,e),v(R,e),v(E,e),v(Y,e),v(A,e),v(K,e),v(oe,e),v(se,e),v(ae,e),v(pe,e),v(U,e),v(me,e),v(fe),v(ue),v(G),v(he),v(ge,e),v(_e),v(ve),v(P),v(be),v(Te,e),v(ye),v(we,e)}}}const Wo='{"title":"Text-to-video","local":"text-to-video","sections":[{"title":"Usage example","local":"usage-example","sections":[{"title":"text-to-video-ms-1.7b","local":"text-to-video-ms-17b","sections":[],"depth":3},{"title":"cerspense/zeroscope_v2_576w & cerspense/zeroscope_v2_XL","local":"cerspensezeroscopev2576w--cerspensezeroscopev2xl","sections":[],"depth":3}],"depth":2},{"title":"Tips","local":"tips","sections":[],"depth":2},{"title":"TextToVideoSDPipeline","local":"diffusers.TextToVideoSDPipeline","sections":[],"depth":2},{"title":"VideoToVideoSDPipeline","local":"diffusers.VideoToVideoSDPipeline","sections":[],"depth":2},{"title":"TextToVideoSDPipelineOutput","local":"diffusers.pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput","sections":[],"depth":2}],"depth":1}';function Bo(j){return jo(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Yo extends ko{constructor(l){super(),$o(this,l,Bo,Po,Vo,{})}}export{Yo as component}; | |
Xet Storage Details
- Size:
- 68.4 kB
- Xet hash:
- 1e7b7ba1376ca034dbd7662ad6dabcd8c86a49344e0db98f33a99438d68eb648
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.