File size: 4,608 Bytes
ebe8a5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | # /// script
# requires-python = ">=3.10"
# dependencies = [
# "huggingface_hub>=0.26",
# "requests>=2.31",
# ]
# ///
"""
mirror_checkpoints.py
---------------------
One-off mirror job: copies the three model dependencies for the
Video Watermark Remover Space into JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints
so the Space is insulated from upstream deletion.
Sources mirrored:
1. Wan-AI/Wan2.1-VACE-14B-diffusers (~75 GB, Apache-2.0) β vace-14b/
2. lightx2v/Wan2.1-Distill-Loras (single LoRA file) β loras/
3. big-lama.pt from GitHub releases (~196 MB, Apache-2.0) β lama/
Strategy
--------
Per-file streaming: download β upload β delete. Disk usage at any moment
is ~one file (max ~5 GB for a single VACE transformer shard), so this fits
on cpu-basic / cpu-upgrade Jobs without ever holding the full 75 GB locally.
"""
import os
import sys
from pathlib import Path
import requests
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
DEST_REPO = "JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints"
TOKEN = os.environ.get("HF_TOKEN")
if not TOKEN:
sys.exit("HF_TOKEN env var not set; pass via `--secrets HF_TOKEN=...`")
WORK = Path("/tmp/mirror")
WORK.mkdir(parents=True, exist_ok=True)
api = HfApi(token=TOKEN)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def stream_repo(
src_repo: str,
dest_prefix: str,
src_type: str = "model",
exclude_globs: list[str] | None = None,
) -> None:
"""Mirror every file in src_repo under dest_prefix in DEST_REPO."""
files = list_repo_files(src_repo, repo_type=src_type, token=TOKEN)
exclude = exclude_globs or []
files = [f for f in files if not any(Path(f).match(g) for g in exclude)]
print(f"\n=== {src_repo} β {dest_prefix}/ ({len(files)} files) ===", flush=True)
for i, fname in enumerate(files, 1):
print(f" [{i:>3}/{len(files)}] {fname}", flush=True)
local = hf_hub_download(
repo_id=src_repo,
repo_type=src_type,
filename=fname,
local_dir=str(WORK),
token=TOKEN,
)
api.upload_file(
path_or_fileobj=local,
path_in_repo=f"{dest_prefix}/{fname}",
repo_id=DEST_REPO,
repo_type="model",
commit_message=f"Mirror {src_repo}: {fname}",
)
Path(local).unlink(missing_ok=True)
def stream_url(url: str, dest_path_in_repo: str, commit_message: str) -> None:
"""Download a single file from an arbitrary URL, push to DEST_REPO, delete."""
fname = Path(dest_path_in_repo).name
print(f"\n=== {url} β {dest_path_in_repo} ===", flush=True)
local = WORK / fname
with requests.get(url, stream=True, timeout=300) as r:
r.raise_for_status()
with open(local, "wb") as fp:
for chunk in r.iter_content(chunk_size=1 << 20): # 1 MB chunks
fp.write(chunk)
api.upload_file(
path_or_fileobj=str(local),
path_in_repo=dest_path_in_repo,
repo_id=DEST_REPO,
repo_type="model",
commit_message=commit_message,
)
local.unlink(missing_ok=True)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
# 1. VACE-14B (largest β do first while disk is freshest)
stream_repo(
"Wan-AI/Wan2.1-VACE-14B-diffusers",
dest_prefix="vace-14b",
exclude_globs=["assets/*", ".gitattributes"],
)
# 2. 4-step distill LoRA (single file)
stream_repo(
"lightx2v/Wan2.1-Distill-Loras",
dest_prefix="loras",
exclude_globs=[
"*.md",
".gitattributes",
# Pull only the rank-64 t2v 4-step LoRA β matches vace.py 8-step plan
"*i2v*",
"*rank32*",
"*rank128*",
],
)
# 3. LaMa from GitHub release
stream_url(
url="https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt",
dest_path_in_repo="lama/big-lama.pt",
commit_message="Mirror big-lama.pt from simple-lama-inpainting v0.1.0 GitHub release",
)
print("\nβ
All mirrors complete.")
if __name__ == "__main__":
main()
|