Instructions to use nvidia/Cosmos3-Super-Image2Video with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use nvidia/Cosmos3-Super-Image2Video with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Diffusers
How to use nvidia/Cosmos3-Super-Image2Video with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("nvidia/Cosmos3-Super-Image2Video", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| """Minimal image-to-video generation against a vLLM-Omni endpoint (sync mode). | |
| Run from the Cosmos3-Super-Image2Video repo root: | |
| python scripts/gen_video.py \ | |
| --endpoint <endpoint-url> \ | |
| --prompt-file assets/example_prompt.json \ | |
| --image-path assets/example_first_frame.png \ | |
| --output-path scripts/output.mp4 | |
| """ | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| import requests | |
| # Fixed generation settings: 16:9 480p, 189 frames @ 24 fps. | |
| ASPECT_RATIO = "16,9" | |
| WIDTH = 832 | |
| HEIGHT = 480 | |
| NUM_FRAMES = 189 | |
| FPS = 24 | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Generate one I2V sample (sync mode).") | |
| parser.add_argument("--endpoint", required=True, help="vLLM-Omni endpoint base URL.") | |
| parser.add_argument("--prompt-file", type=Path, default=Path("assets/example_prompt.json")) | |
| parser.add_argument("--image-path", type=Path, default=Path("assets/example_first_frame.png")) | |
| parser.add_argument("--output-path", type=Path, default=Path("scripts/output.mp4")) | |
| args = parser.parse_args() | |
| spec = json.loads(args.prompt_file.read_text(encoding="utf-8")) | |
| # Safeguard the metadata and json format | |
| prompt = json.loads(spec["prompt"]) | |
| prompt["duration"] = f"{int(NUM_FRAMES / FPS)}s" | |
| prompt["fps"] = float(round(FPS)) | |
| prompt["resolution"] = {"H": HEIGHT, "W": WIDTH} | |
| prompt["aspect_ratio"] = ASPECT_RATIO | |
| data = { | |
| "prompt": json.dumps(prompt, ensure_ascii=False), | |
| "negative_prompt": spec["negative_prompt"], | |
| "size": f"{WIDTH}x{HEIGHT}", | |
| "num_frames": NUM_FRAMES, | |
| "fps": FPS, | |
| "num_inference_steps": 50, | |
| "guidance_scale": 6.0, | |
| "flow_shift": 5.0, | |
| "extra_params": json.dumps({"use_resolution_template": False, "use_duration_template": False}), | |
| } | |
| files = {"input_reference": ("input.png", args.image_path.read_bytes(), "image/png")} | |
| headers = {"Accept": "video/mp4", "User-Agent": "curl/8.5.0"} | |
| response = requests.post(f"{args.endpoint}/v1/videos/sync", data=data, files=files, headers=headers, timeout=(10, 600)) | |
| response.raise_for_status() | |
| args.output_path.parent.mkdir(parents=True, exist_ok=True) | |
| args.output_path.write_bytes(response.content) | |
| print(f"Saved video to {args.output_path} ({len(response.content) / (1024 * 1024):.1f} MB)") | |
| if __name__ == "__main__": | |
| main() | |