Jack Wu
feat: introduce checkpoint mirroring script, strengthen video validation, and improve pipeline robustness for masking and compositing.
ebe8a5c | # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "huggingface_hub>=0.26", | |
| # "requests>=2.31", | |
| # ] | |
| # /// | |
| """ | |
| mirror_checkpoints.py | |
| --------------------- | |
| One-off mirror job: copies the three model dependencies for the | |
| Video Watermark Remover Space into JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints | |
| so the Space is insulated from upstream deletion. | |
| Sources mirrored: | |
| 1. Wan-AI/Wan2.1-VACE-14B-diffusers (~75 GB, Apache-2.0) β vace-14b/ | |
| 2. lightx2v/Wan2.1-Distill-Loras (single LoRA file) β loras/ | |
| 3. big-lama.pt from GitHub releases (~196 MB, Apache-2.0) β lama/ | |
| Strategy | |
| -------- | |
| Per-file streaming: download β upload β delete. Disk usage at any moment | |
| is ~one file (max ~5 GB for a single VACE transformer shard), so this fits | |
| on cpu-basic / cpu-upgrade Jobs without ever holding the full 75 GB locally. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import requests | |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files | |
| # --------------------------------------------------------------------------- | |
| # Config | |
| # --------------------------------------------------------------------------- | |
| DEST_REPO = "JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints" | |
| TOKEN = os.environ.get("HF_TOKEN") | |
| if not TOKEN: | |
| sys.exit("HF_TOKEN env var not set; pass via `--secrets HF_TOKEN=...`") | |
| WORK = Path("/tmp/mirror") | |
| WORK.mkdir(parents=True, exist_ok=True) | |
| api = HfApi(token=TOKEN) | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def stream_repo( | |
| src_repo: str, | |
| dest_prefix: str, | |
| src_type: str = "model", | |
| exclude_globs: list[str] | None = None, | |
| ) -> None: | |
| """Mirror every file in src_repo under dest_prefix in DEST_REPO.""" | |
| files = list_repo_files(src_repo, repo_type=src_type, token=TOKEN) | |
| exclude = exclude_globs or [] | |
| files = [f for f in files if not any(Path(f).match(g) for g in exclude)] | |
| print(f"\n=== {src_repo} β {dest_prefix}/ ({len(files)} files) ===", flush=True) | |
| for i, fname in enumerate(files, 1): | |
| print(f" [{i:>3}/{len(files)}] {fname}", flush=True) | |
| local = hf_hub_download( | |
| repo_id=src_repo, | |
| repo_type=src_type, | |
| filename=fname, | |
| local_dir=str(WORK), | |
| token=TOKEN, | |
| ) | |
| api.upload_file( | |
| path_or_fileobj=local, | |
| path_in_repo=f"{dest_prefix}/{fname}", | |
| repo_id=DEST_REPO, | |
| repo_type="model", | |
| commit_message=f"Mirror {src_repo}: {fname}", | |
| ) | |
| Path(local).unlink(missing_ok=True) | |
| def stream_url(url: str, dest_path_in_repo: str, commit_message: str) -> None: | |
| """Download a single file from an arbitrary URL, push to DEST_REPO, delete.""" | |
| fname = Path(dest_path_in_repo).name | |
| print(f"\n=== {url} β {dest_path_in_repo} ===", flush=True) | |
| local = WORK / fname | |
| with requests.get(url, stream=True, timeout=300) as r: | |
| r.raise_for_status() | |
| with open(local, "wb") as fp: | |
| for chunk in r.iter_content(chunk_size=1 << 20): # 1 MB chunks | |
| fp.write(chunk) | |
| api.upload_file( | |
| path_or_fileobj=str(local), | |
| path_in_repo=dest_path_in_repo, | |
| repo_id=DEST_REPO, | |
| repo_type="model", | |
| commit_message=commit_message, | |
| ) | |
| local.unlink(missing_ok=True) | |
| # --------------------------------------------------------------------------- | |
| # Main | |
| # --------------------------------------------------------------------------- | |
| def main() -> None: | |
| # 1. VACE-14B (largest β do first while disk is freshest) | |
| stream_repo( | |
| "Wan-AI/Wan2.1-VACE-14B-diffusers", | |
| dest_prefix="vace-14b", | |
| exclude_globs=["assets/*", ".gitattributes"], | |
| ) | |
| # 2. 4-step distill LoRA (single file) | |
| stream_repo( | |
| "lightx2v/Wan2.1-Distill-Loras", | |
| dest_prefix="loras", | |
| exclude_globs=[ | |
| "*.md", | |
| ".gitattributes", | |
| # Pull only the rank-64 t2v 4-step LoRA β matches vace.py 8-step plan | |
| "*i2v*", | |
| "*rank32*", | |
| "*rank128*", | |
| ], | |
| ) | |
| # 3. LaMa from GitHub release | |
| stream_url( | |
| url="https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt", | |
| dest_path_in_repo="lama/big-lama.pt", | |
| commit_message="Mirror big-lama.pt from simple-lama-inpainting v0.1.0 GitHub release", | |
| ) | |
| print("\nβ All mirrors complete.") | |
| if __name__ == "__main__": | |
| main() | |