| import gradio as gr |
| import imageio |
| import torch |
| from diffusers import TextToVideoZeroPipeline |
|
|
| from video_diffusion.tuneavideo.util import save_videos_grid |
| from video_diffusion.utils.model_list import stable_model_list |
|
|
|
|
| class ZeroShotText2VideoGenerator: |
| def __init__(self): |
| self.pipe = None |
|
|
| def load_model(self, model_id): |
| if self.pipe is None: |
| self.pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") |
| self.pipe.to("cuda") |
| self.pipe.enable_xformers_memory_efficient_attention() |
| self.pipe.enable_attention_slicing() |
|
|
| return self.pipe |
|
|
| def generate_video( |
| self, |
| prompt, |
| negative_prompt, |
| model_id, |
| height, |
| width, |
| video_length, |
| guidance_scale, |
| fps, |
| t0, |
| t1, |
| motion_field_strength_x, |
| motion_field_strength_y, |
| ): |
| pipe = self.load_model(model_id) |
| result = pipe( |
| prompt=prompt, |
| negative_prompt=negative_prompt, |
| height=height, |
| width=width, |
| video_length=video_length, |
| guidance_scale=guidance_scale, |
| t0=t0, |
| t1=t1, |
| motion_field_strength_x=motion_field_strength_x, |
| motion_field_strength_y=motion_field_strength_y, |
| ).images |
|
|
| result = [(r * 255).astype("uint8") for r in result] |
| imageio.mimsave("video.mp4", result, fps=fps) |
| return "video.mp4" |
|
|
| def app(): |
| with gr.Blocks(): |
| with gr.Row(): |
| with gr.Column(): |
| zero_shot_text2video_prompt = gr.Textbox( |
| lines=1, |
| placeholder="Prompt", |
| show_label=False, |
| ) |
| zero_shot_text2video_negative_prompt = gr.Textbox( |
| lines=1, |
| placeholder="Negative Prompt", |
| show_label=False, |
| ) |
| zero_shot_text2video_model_id = gr.Dropdown( |
| choices=stable_model_list, |
| label="Stable Model List", |
| value=stable_model_list[0], |
| ) |
| with gr.Row(): |
| with gr.Column(): |
| zero_shot_text2video_guidance_scale = gr.Slider( |
| label="Guidance Scale", |
| minimum=1, |
| maximum=15, |
| step=1, |
| value=7.5, |
| ) |
| zero_shot_text2video_video_length = gr.Slider( |
| label="Video Length", |
| minimum=1, |
| maximum=100, |
| step=1, |
| value=10, |
| ) |
| zero_shot_text2video_t0 = gr.Slider( |
| label="Timestep T0", |
| minimum=0, |
| maximum=100, |
| step=1, |
| value=44, |
| ) |
| zero_shot_text2video_motion_field_strength_x = gr.Slider( |
| label="Motion Field Strength X", |
| minimum=0, |
| maximum=100, |
| step=1, |
| value=12, |
| ) |
| zero_shot_text2video_fps = gr.Slider( |
| label="Fps", |
| minimum=1, |
| maximum=60, |
| step=1, |
| value=10, |
| ) |
| with gr.Row(): |
| with gr.Column(): |
| zero_shot_text2video_height = gr.Slider( |
| label="Height", |
| minimum=128, |
| maximum=1280, |
| step=32, |
| value=512, |
| ) |
| zero_shot_text2video_width = gr.Slider( |
| label="Width", |
| minimum=128, |
| maximum=1280, |
| step=32, |
| value=512, |
| ) |
| zero_shot_text2video_t1 = gr.Slider( |
| label="Timestep T1", |
| minimum=0, |
| maximum=100, |
| step=1, |
| value=47, |
| ) |
| zero_shot_text2video_motion_field_strength_y = gr.Slider( |
| label="Motion Field Strength Y", |
| minimum=0, |
| maximum=100, |
| step=1, |
| value=12, |
| ) |
| zero_shot_text2video_button = gr.Button(value="Generator") |
|
|
| with gr.Column(): |
| zero_shot_text2video_output = gr.Video(label="Output") |
|
|
| zero_shot_text2video_button.click( |
| fn=ZeroShotText2VideoGenerator().generate_video, |
| inputs=[ |
| zero_shot_text2video_prompt, |
| zero_shot_text2video_negative_prompt, |
| zero_shot_text2video_model_id, |
| zero_shot_text2video_height, |
| zero_shot_text2video_width, |
| zero_shot_text2video_video_length, |
| zero_shot_text2video_guidance_scale, |
| zero_shot_text2video_fps, |
| zero_shot_text2video_t0, |
| zero_shot_text2video_t1, |
| zero_shot_text2video_motion_field_strength_x, |
| zero_shot_text2video_motion_field_strength_y, |
| ], |
| outputs=zero_shot_text2video_output, |
| ) |
|
|