File size: 4,608 Bytes
ebe8a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# /// script
# requires-python = ">=3.10"
# dependencies = [
#   "huggingface_hub>=0.26",
#   "requests>=2.31",
# ]
# ///
"""
mirror_checkpoints.py
---------------------
One-off mirror job: copies the three model dependencies for the
Video Watermark Remover Space into JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints
so the Space is insulated from upstream deletion.

Sources mirrored:
  1. Wan-AI/Wan2.1-VACE-14B-diffusers     (~75 GB, Apache-2.0) β†’ vace-14b/
  2. lightx2v/Wan2.1-Distill-Loras        (single LoRA file)   β†’ loras/
  3. big-lama.pt from GitHub releases     (~196 MB, Apache-2.0) β†’ lama/

Strategy
--------
Per-file streaming: download β†’ upload β†’ delete. Disk usage at any moment
is ~one file (max ~5 GB for a single VACE transformer shard), so this fits
on cpu-basic / cpu-upgrade Jobs without ever holding the full 75 GB locally.
"""

import os
import sys
from pathlib import Path

import requests
from huggingface_hub import HfApi, hf_hub_download, list_repo_files

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
DEST_REPO = "JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints"
TOKEN = os.environ.get("HF_TOKEN")
if not TOKEN:
    sys.exit("HF_TOKEN env var not set; pass via `--secrets HF_TOKEN=...`")

WORK = Path("/tmp/mirror")
WORK.mkdir(parents=True, exist_ok=True)

api = HfApi(token=TOKEN)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def stream_repo(
    src_repo: str,
    dest_prefix: str,
    src_type: str = "model",
    exclude_globs: list[str] | None = None,
) -> None:
    """Mirror every file in src_repo under dest_prefix in DEST_REPO."""
    files = list_repo_files(src_repo, repo_type=src_type, token=TOKEN)
    exclude = exclude_globs or []
    files = [f for f in files if not any(Path(f).match(g) for g in exclude)]
    print(f"\n=== {src_repo} β†’ {dest_prefix}/ ({len(files)} files) ===", flush=True)

    for i, fname in enumerate(files, 1):
        print(f"  [{i:>3}/{len(files)}] {fname}", flush=True)
        local = hf_hub_download(
            repo_id=src_repo,
            repo_type=src_type,
            filename=fname,
            local_dir=str(WORK),
            token=TOKEN,
        )
        api.upload_file(
            path_or_fileobj=local,
            path_in_repo=f"{dest_prefix}/{fname}",
            repo_id=DEST_REPO,
            repo_type="model",
            commit_message=f"Mirror {src_repo}: {fname}",
        )
        Path(local).unlink(missing_ok=True)


def stream_url(url: str, dest_path_in_repo: str, commit_message: str) -> None:
    """Download a single file from an arbitrary URL, push to DEST_REPO, delete."""
    fname = Path(dest_path_in_repo).name
    print(f"\n=== {url} β†’ {dest_path_in_repo} ===", flush=True)
    local = WORK / fname
    with requests.get(url, stream=True, timeout=300) as r:
        r.raise_for_status()
        with open(local, "wb") as fp:
            for chunk in r.iter_content(chunk_size=1 << 20):  # 1 MB chunks
                fp.write(chunk)
    api.upload_file(
        path_or_fileobj=str(local),
        path_in_repo=dest_path_in_repo,
        repo_id=DEST_REPO,
        repo_type="model",
        commit_message=commit_message,
    )
    local.unlink(missing_ok=True)


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
    # 1. VACE-14B (largest β€” do first while disk is freshest)
    stream_repo(
        "Wan-AI/Wan2.1-VACE-14B-diffusers",
        dest_prefix="vace-14b",
        exclude_globs=["assets/*", ".gitattributes"],
    )

    # 2. 4-step distill LoRA (single file)
    stream_repo(
        "lightx2v/Wan2.1-Distill-Loras",
        dest_prefix="loras",
        exclude_globs=[
            "*.md",
            ".gitattributes",
            # Pull only the rank-64 t2v 4-step LoRA β€” matches vace.py 8-step plan
            "*i2v*",
            "*rank32*",
            "*rank128*",
        ],
    )

    # 3. LaMa from GitHub release
    stream_url(
        url="https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt",
        dest_path_in_repo="lama/big-lama.pt",
        commit_message="Mirror big-lama.pt from simple-lama-inpainting v0.1.0 GitHub release",
    )

    print("\nβœ… All mirrors complete.")


if __name__ == "__main__":
    main()