# /// script # requires-python = ">=3.10" # dependencies = [ # "huggingface_hub>=0.26", # "requests>=2.31", # ] # /// """ mirror_checkpoints.py --------------------- One-off mirror job: copies the three model dependencies for the Video Watermark Remover Space into JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints so the Space is insulated from upstream deletion. Sources mirrored: 1. Wan-AI/Wan2.1-VACE-14B-diffusers (~75 GB, Apache-2.0) → vace-14b/ 2. lightx2v/Wan2.1-Distill-Loras (single LoRA file) → loras/ 3. big-lama.pt from GitHub releases (~196 MB, Apache-2.0) → lama/ Strategy -------- Per-file streaming: download → upload → delete. Disk usage at any moment is ~one file (max ~5 GB for a single VACE transformer shard), so this fits on cpu-basic / cpu-upgrade Jobs without ever holding the full 75 GB locally. """ import os import sys from pathlib import Path import requests from huggingface_hub import HfApi, hf_hub_download, list_repo_files # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- DEST_REPO = "JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints" TOKEN = os.environ.get("HF_TOKEN") if not TOKEN: sys.exit("HF_TOKEN env var not set; pass via `--secrets HF_TOKEN=...`") WORK = Path("/tmp/mirror") WORK.mkdir(parents=True, exist_ok=True) api = HfApi(token=TOKEN) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def stream_repo( src_repo: str, dest_prefix: str, src_type: str = "model", exclude_globs: list[str] | None = None, ) -> None: """Mirror every file in src_repo under dest_prefix in DEST_REPO.""" files = list_repo_files(src_repo, repo_type=src_type, token=TOKEN) exclude = exclude_globs or [] files = [f for f in files if not any(Path(f).match(g) for g in exclude)] print(f"\n=== {src_repo} → {dest_prefix}/ ({len(files)} files) ===", flush=True) for i, fname in enumerate(files, 1): print(f" [{i:>3}/{len(files)}] {fname}", flush=True) local = hf_hub_download( repo_id=src_repo, repo_type=src_type, filename=fname, local_dir=str(WORK), token=TOKEN, ) api.upload_file( path_or_fileobj=local, path_in_repo=f"{dest_prefix}/{fname}", repo_id=DEST_REPO, repo_type="model", commit_message=f"Mirror {src_repo}: {fname}", ) Path(local).unlink(missing_ok=True) def stream_url(url: str, dest_path_in_repo: str, commit_message: str) -> None: """Download a single file from an arbitrary URL, push to DEST_REPO, delete.""" fname = Path(dest_path_in_repo).name print(f"\n=== {url} → {dest_path_in_repo} ===", flush=True) local = WORK / fname with requests.get(url, stream=True, timeout=300) as r: r.raise_for_status() with open(local, "wb") as fp: for chunk in r.iter_content(chunk_size=1 << 20): # 1 MB chunks fp.write(chunk) api.upload_file( path_or_fileobj=str(local), path_in_repo=dest_path_in_repo, repo_id=DEST_REPO, repo_type="model", commit_message=commit_message, ) local.unlink(missing_ok=True) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: # 1. VACE-14B (largest — do first while disk is freshest) stream_repo( "Wan-AI/Wan2.1-VACE-14B-diffusers", dest_prefix="vace-14b", exclude_globs=["assets/*", ".gitattributes"], ) # 2. 4-step distill LoRA (single file) stream_repo( "lightx2v/Wan2.1-Distill-Loras", dest_prefix="loras", exclude_globs=[ "*.md", ".gitattributes", # Pull only the rank-64 t2v 4-step LoRA — matches vace.py 8-step plan "*i2v*", "*rank32*", "*rank128*", ], ) # 3. LaMa from GitHub release stream_url( url="https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt", dest_path_in_repo="lama/big-lama.pt", commit_message="Mirror big-lama.pt from simple-lama-inpainting v0.1.0 GitHub release", ) print("\n✅ All mirrors complete.") if __name__ == "__main__": main()