| |
| """Hugging Face 资源删除工具 — 支持删除 Space / Dataset / Model / Storage / 文件。 |
| |
| 用法: |
| # 删除整个仓库 |
| python scripts/delete-hf.py repo <repo_id> --type space |
| python scripts/delete-hf.py repo <repo_id> --type dataset |
| python scripts/delete-hf.py repo <repo_id> --type model |
| |
| # 删除仓库中的文件(支持通配符) |
| python scripts/delete-hf.py files <repo_id> --pattern "backups/*" |
| |
| # 删除 Space 持久存储 |
| python scripts/delete-hf.py storage <space_id> |
| |
| # 交互确认 + token 参数 |
| python scripts/delete-hf.py repo my-space --type space --yes |
| python scripts/delete-hf.py files my-backup --pattern "*.old" --token hf_xxx |
| """ |
|
|
| import argparse |
| import sys |
| import time |
|
|
| try: |
| from huggingface_hub import HfApi |
| except ModuleNotFoundError: |
| print("Error: 'huggingface_hub' module not found.") |
| print() |
| print("Install it with:") |
| print(" uv add 'huggingface_hub[cli]'") |
| print() |
| print("Or with pip:") |
| print(" pip install 'huggingface_hub[cli]'") |
| sys.exit(1) |
|
|
| _BATCH_SIZE = 100 |
|
|
|
|
| def _confirm(prompt: str, auto_yes: bool) -> bool: |
| if auto_yes: |
| return True |
| ans = input(f"{prompt} (yes/NO): ").strip().lower() |
| return ans in ("yes", "y") |
|
|
|
|
| def cmd_delete_repo(api: HfApi, args: argparse.Namespace) -> None: |
| """删除整个仓库(Space / Dataset / Model)。""" |
| repo_id = args.repo_id |
| repo_type = args.type |
|
|
| print(f"待删除仓库: {repo_id} (type={repo_type})") |
| print(" 此操作不可恢复!") |
|
|
| if not _confirm(f"确认删除仓库 '{repo_id}'?", args.yes): |
| print("[INFO] 已取消") |
| return |
|
|
| try: |
| api.delete_repo(repo_id=repo_id, repo_type=repo_type) |
| print(f"✓ 仓库 '{repo_id}' 已删除") |
| except Exception as e: |
| print(f"✗ 删除失败: {e}", file=sys.stderr) |
| sys.exit(1) |
|
|
|
|
| def cmd_delete_files(api: HfApi, args: argparse.Namespace) -> None: |
| """根据通配符模式批量删除仓库中的文件。""" |
| repo_id = args.repo_id |
| patterns = args.patterns |
| repo_type = args.type |
|
|
| |
| try: |
| all_files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type) |
| except Exception as e: |
| print(f"[ERROR] 无法列出文件: {e}", file=sys.stderr) |
| sys.exit(1) |
|
|
| import fnmatch |
| matched = [] |
| for f in all_files: |
| for pat in patterns: |
| if fnmatch.fnmatch(f, pat): |
| matched.append(f) |
| break |
|
|
| if not matched: |
| print("[INFO] 没有匹配的文件") |
| return |
|
|
| print(f"匹配到 {len(matched)} 个文件:") |
| for f in sorted(matched): |
| print(f" - {f}") |
|
|
| if not _confirm(f"确认删除这 {len(matched)} 个文件?", args.yes): |
| print("[INFO] 已取消") |
| return |
|
|
| |
| total = len(patterns) |
| total_batches = (total + _BATCH_SIZE - 1) // _BATCH_SIZE |
| deleted_total = 0 |
|
|
| for batch_idx in range(total_batches): |
| start = batch_idx * _BATCH_SIZE |
| end = min(start + _BATCH_SIZE, total) |
| batch = patterns[start:end] |
|
|
| try: |
| api.delete_files( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| delete_patterns=batch, |
| commit_message=( |
| f"delete-hf: batch {batch_idx + 1}/{total_batches} " |
| f"({len(batch)} patterns)" |
| ), |
| ) |
| deleted_total += len(batch) |
| print(f" ✓ 第 {batch_idx + 1}/{total_batches} 批: " |
| f"删除 {len(batch)} 个通配符模式") |
| if batch_idx + 1 < total_batches: |
| time.sleep(1) |
| except Exception as e: |
| print(f" ✗ 第 {batch_idx + 1}/{total_batches} 批失败: {e}", |
| file=sys.stderr) |
| print(f" 已删除 {deleted_total}/{total} 个模式后中断") |
| sys.exit(1) |
|
|
| print(f"✓ 所有匹配文件已删除({len(matched)} 个文件,{total} 个模式)") |
|
|
|
|
| def cmd_delete_storage(api: HfApi, args: argparse.Namespace) -> None: |
| """删除 Space 的持久存储。""" |
| space_id = args.space_id |
|
|
| print(f"待删除存储的 Space: {space_id}") |
| print(" 此操作不可恢复!所有持久存储数据将被清除。") |
|
|
| if not _confirm(f"确认删除 '{space_id}' 的持久存储?", args.yes): |
| print("[INFO] 已取消") |
| return |
|
|
| try: |
| result = api.delete_space_storage(repo_id=space_id) |
| print(f"✓ Space '{space_id}' 的持久存储已删除") |
| if result: |
| print(f" 当前运行时状态: {result}") |
| except Exception as e: |
| print(f"✗ 删除失败: {e}", file=sys.stderr) |
| if "no persistent storage" in str(e).lower(): |
| print(" [HINT] 该 Space 可能未启用持久存储", file=sys.stderr) |
| sys.exit(1) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Hugging Face 资源删除工具" |
| ) |
| parser.add_argument("--token", default=None, help="HF API token") |
| parser.add_argument("--yes", "-y", action="store_true", |
| help="跳过确认提示") |
|
|
| sub = parser.add_subparsers(dest="command", required=True, |
| help="子命令") |
|
|
| |
| p_repo = sub.add_parser("repo", help="删除整个仓库") |
| p_repo.add_argument("repo_id", help="仓库 ID,如 user/space-name") |
| p_repo.add_argument("--type", dest="type", required=True, |
| choices=["space", "dataset", "model"], |
| help="仓库类型") |
| p_repo.set_defaults(func=cmd_delete_repo) |
|
|
| |
| p_files = sub.add_parser("files", help="批量删除仓库中的文件") |
| p_files.add_argument("repo_id", help="仓库 ID") |
| p_files.add_argument("--pattern", dest="patterns", |
| required=True, action="append", |
| help="通配符模式(可多次使用)") |
| p_files.add_argument("--type", dest="type", default="dataset", |
| choices=["dataset", "model", "space"], |
| help="仓库类型(默认: dataset)") |
| p_files.set_defaults(func=cmd_delete_files) |
|
|
| |
| p_storage = sub.add_parser("storage", help="删除 Space 持久存储") |
| p_storage.add_argument("space_id", help="Space ID,如 user/space-name") |
| p_storage.set_defaults(func=cmd_delete_storage) |
|
|
| args = parser.parse_args() |
|
|
| |
| api = HfApi(token=args.token) |
|
|
| |
| args.func(api, args) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|