nvidia
/

Cosmos3-Super-Image2Video

video-generation

Model card Files Files and versions

Cosmos3-Super-Image2Video / scripts /gen_video.py

mingyuliutw's picture

Super-squash branch 'main' using huggingface_hub

8889131 about 24 hours ago

history blame contribute delete

2.39 kB

	"""Minimal image-to-video generation against a vLLM-Omni endpoint (sync mode).

	Run from the Cosmos3-Super-Image2Video repo root:

	python scripts/gen_video.py \
	--endpoint <endpoint-url> \
	--prompt-file assets/example_prompt.json \
	--image-path assets/example_first_frame.png \
	--output-path scripts/output.mp4
	"""

	import argparse
	import json
	from pathlib import Path

	import requests

	# Fixed generation settings: 16:9 480p, 189 frames @ 24 fps.
	ASPECT_RATIO = "16,9"
	WIDTH = 832
	HEIGHT = 480
	NUM_FRAMES = 189
	FPS = 24


	def main() -> None:
	parser = argparse.ArgumentParser(description="Generate one I2V sample (sync mode).")
	parser.add_argument("--endpoint", required=True, help="vLLM-Omni endpoint base URL.")
	parser.add_argument("--prompt-file", type=Path, default=Path("assets/example_prompt.json"))
	parser.add_argument("--image-path", type=Path, default=Path("assets/example_first_frame.png"))
	parser.add_argument("--output-path", type=Path, default=Path("scripts/output.mp4"))
	args = parser.parse_args()

	spec = json.loads(args.prompt_file.read_text(encoding="utf-8"))
	# Safeguard the metadata and json format
	prompt = json.loads(spec["prompt"])
	prompt["duration"] = f"{int(NUM_FRAMES / FPS)}s"
	prompt["fps"] = float(round(FPS))
	prompt["resolution"] = {"H": HEIGHT, "W": WIDTH}
	prompt["aspect_ratio"] = ASPECT_RATIO
	data = {
	"prompt": json.dumps(prompt, ensure_ascii=False),
	"negative_prompt": spec["negative_prompt"],
	"size": f"{WIDTH}x{HEIGHT}",
	"num_frames": NUM_FRAMES,
	"fps": FPS,
	"num_inference_steps": 50,
	"guidance_scale": 6.0,
	"flow_shift": 5.0,
	"extra_params": json.dumps({"use_resolution_template": False, "use_duration_template": False}),
	}
	files = {"input_reference": ("input.png", args.image_path.read_bytes(), "image/png")}
	headers = {"Accept": "video/mp4", "User-Agent": "curl/8.5.0"}

	response = requests.post(f"{args.endpoint}/v1/videos/sync", data=data, files=files, headers=headers, timeout=(10, 600))
	response.raise_for_status()

	args.output_path.parent.mkdir(parents=True, exist_ok=True)
	args.output_path.write_bytes(response.content)
	print(f"Saved video to {args.output_path} ({len(response.content) / (1024 * 1024):.1f} MB)")


	if __name__ == "__main__":
	main()