"""Minimal image-to-video generation against a vLLM-Omni endpoint (sync mode). Run from the Cosmos3-Super-Image2Video repo root: python scripts/gen_video.py \ --endpoint \ --prompt-file assets/example_prompt.json \ --image-path assets/example_first_frame.png \ --output-path scripts/output.mp4 """ import argparse import json from pathlib import Path import requests # Fixed generation settings: 16:9 480p, 189 frames @ 24 fps. ASPECT_RATIO = "16,9" WIDTH = 832 HEIGHT = 480 NUM_FRAMES = 189 FPS = 24 def main() -> None: parser = argparse.ArgumentParser(description="Generate one I2V sample (sync mode).") parser.add_argument("--endpoint", required=True, help="vLLM-Omni endpoint base URL.") parser.add_argument("--prompt-file", type=Path, default=Path("assets/example_prompt.json")) parser.add_argument("--image-path", type=Path, default=Path("assets/example_first_frame.png")) parser.add_argument("--output-path", type=Path, default=Path("scripts/output.mp4")) args = parser.parse_args() spec = json.loads(args.prompt_file.read_text(encoding="utf-8")) # Safeguard the metadata and json format prompt = json.loads(spec["prompt"]) prompt["duration"] = f"{int(NUM_FRAMES / FPS)}s" prompt["fps"] = float(round(FPS)) prompt["resolution"] = {"H": HEIGHT, "W": WIDTH} prompt["aspect_ratio"] = ASPECT_RATIO data = { "prompt": json.dumps(prompt, ensure_ascii=False), "negative_prompt": spec["negative_prompt"], "size": f"{WIDTH}x{HEIGHT}", "num_frames": NUM_FRAMES, "fps": FPS, "num_inference_steps": 50, "guidance_scale": 6.0, "flow_shift": 5.0, "extra_params": json.dumps({"use_resolution_template": False, "use_duration_template": False}), } files = {"input_reference": ("input.png", args.image_path.read_bytes(), "image/png")} headers = {"Accept": "video/mp4", "User-Agent": "curl/8.5.0"} response = requests.post(f"{args.endpoint}/v1/videos/sync", data=data, files=files, headers=headers, timeout=(10, 600)) response.raise_for_status() args.output_path.parent.mkdir(parents=True, exist_ok=True) args.output_path.write_bytes(response.content) print(f"Saved video to {args.output_path} ({len(response.content) / (1024 * 1024):.1f} MB)") if __name__ == "__main__": main()