mingyuliutw's picture
Super-squash branch 'main' using huggingface_hub
8889131
raw
history blame
2.39 kB
"""Minimal image-to-video generation against a vLLM-Omni endpoint (sync mode).
Run from the Cosmos3-Super-Image2Video repo root:
python scripts/gen_video.py \
--endpoint <endpoint-url> \
--prompt-file assets/example_prompt.json \
--image-path assets/example_first_frame.png \
--output-path scripts/output.mp4
"""
import argparse
import json
from pathlib import Path
import requests
# Fixed generation settings: 16:9 480p, 189 frames @ 24 fps.
ASPECT_RATIO = "16,9"
WIDTH = 832
HEIGHT = 480
NUM_FRAMES = 189
FPS = 24
def main() -> None:
parser = argparse.ArgumentParser(description="Generate one I2V sample (sync mode).")
parser.add_argument("--endpoint", required=True, help="vLLM-Omni endpoint base URL.")
parser.add_argument("--prompt-file", type=Path, default=Path("assets/example_prompt.json"))
parser.add_argument("--image-path", type=Path, default=Path("assets/example_first_frame.png"))
parser.add_argument("--output-path", type=Path, default=Path("scripts/output.mp4"))
args = parser.parse_args()
spec = json.loads(args.prompt_file.read_text(encoding="utf-8"))
# Safeguard the metadata and json format
prompt = json.loads(spec["prompt"])
prompt["duration"] = f"{int(NUM_FRAMES / FPS)}s"
prompt["fps"] = float(round(FPS))
prompt["resolution"] = {"H": HEIGHT, "W": WIDTH}
prompt["aspect_ratio"] = ASPECT_RATIO
data = {
"prompt": json.dumps(prompt, ensure_ascii=False),
"negative_prompt": spec["negative_prompt"],
"size": f"{WIDTH}x{HEIGHT}",
"num_frames": NUM_FRAMES,
"fps": FPS,
"num_inference_steps": 50,
"guidance_scale": 6.0,
"flow_shift": 5.0,
"extra_params": json.dumps({"use_resolution_template": False, "use_duration_template": False}),
}
files = {"input_reference": ("input.png", args.image_path.read_bytes(), "image/png")}
headers = {"Accept": "video/mp4", "User-Agent": "curl/8.5.0"}
response = requests.post(f"{args.endpoint}/v1/videos/sync", data=data, files=files, headers=headers, timeout=(10, 600))
response.raise_for_status()
args.output_path.parent.mkdir(parents=True, exist_ok=True)
args.output_path.write_bytes(response.content)
print(f"Saved video to {args.output_path} ({len(response.content) / (1024 * 1024):.1f} MB)")
if __name__ == "__main__":
main()