import gradio as gr import torch from diffusers import StableVideoDiffusionPipeline from PIL import Image import imageio import uuid import numpy as np import cv2 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = None current_model = None # 🔄 Load model only when needed (fixes slow startup) def load_model(model_name): global pipe, current_model if current_model == model_name: return pipe try: if model_name == "Fast (SVD)": model_id = "stabilityai/stable-video-diffusion-img2vid" else: model_id = "stabilityai/stable-video-diffusion-img2vid-xt" pipe = StableVideoDiffusionPipeline.from_pretrained( model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32 ) pipe = pipe.to(device) if device == "cuda": pipe.enable_attention_slicing() pipe.enable_model_cpu_offload() current_model = model_name return pipe except Exception as e: print("Model load error:", e) return None # 🎥 Extract frame from video def extract_frame(video_path): cap = cv2.VideoCapture(video_path) success, frame = cap.read() cap.release() if success: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) return Image.fromarray(frame) return None def generate_video(image, video, fps, motion_strength, model_choice): try: pipe = load_model(model_choice) if pipe is None: return None # Select input if image is not None: input_image = image.convert("RGB") elif video is not None: input_image = extract_frame(video) if input_image is None: return None else: return None # Resize (⚡ HUGE speed boost) input_image = input_image.resize((512, 512)) # Generate frames (reduced for speed) output = pipe( input_image, num_frames=16, # ⚡ faster decode_chunk_size=4, motion_bucket_id=int(motion_strength) ) frames = output.frames[0] frames = [(frame * 255).astype(np.uint8) for frame in frames] filename = f"video_{uuid.uuid4().hex}.mp4" imageio.mimsave( filename, frames, fps=fps, codec="libx264" ) return filename except Exception as e: print("Generation error:", e) return None # 🎨 UI with gr.Blocks() as demo: gr.Markdown("# 🎬 StuffMotion AI (FAST + MODEL SELECT)") image_input = gr.Image(type="pil", label="🖼️ Image Input") video_input = gr.Video(label="🎥 Video Input") model_choice = gr.Dropdown( ["Fast (SVD)", "High Quality (XT)"], value="Fast (SVD)", label="🧠 Model" ) fps = gr.Slider(8, 24, value=12, step=1, label="FPS") motion = gr.Slider(1, 255, value=100, label="Motion") generate_btn = gr.Button("⚡ Generate") video_output = gr.Video() generate_btn.click( fn=generate_video, inputs=[image_input, video_input, fps, motion, model_choice], outputs=video_output ) demo.launch()