#!/usr/bin/env python3 """Hugging Face 资源删除工具 — 支持删除 Space / Dataset / Model / Storage / 文件。 用法: # 删除整个仓库 python scripts/delete-hf.py repo --type space python scripts/delete-hf.py repo --type dataset python scripts/delete-hf.py repo --type model # 删除仓库中的文件(支持通配符) python scripts/delete-hf.py files --pattern "backups/*" # 删除 Space 持久存储 python scripts/delete-hf.py storage # 交互确认 + token 参数 python scripts/delete-hf.py repo my-space --type space --yes python scripts/delete-hf.py files my-backup --pattern "*.old" --token hf_xxx """ import argparse import sys import time try: from huggingface_hub import HfApi except ModuleNotFoundError: print("Error: 'huggingface_hub' module not found.") print() print("Install it with:") print(" uv add 'huggingface_hub[cli]'") print() print("Or with pip:") print(" pip install 'huggingface_hub[cli]'") sys.exit(1) _BATCH_SIZE = 100 def _confirm(prompt: str, auto_yes: bool) -> bool: if auto_yes: return True ans = input(f"{prompt} (yes/NO): ").strip().lower() return ans in ("yes", "y") def cmd_delete_repo(api: HfApi, args: argparse.Namespace) -> None: """删除整个仓库(Space / Dataset / Model)。""" repo_id = args.repo_id repo_type = args.type print(f"待删除仓库: {repo_id} (type={repo_type})") print(" 此操作不可恢复!") if not _confirm(f"确认删除仓库 '{repo_id}'?", args.yes): print("[INFO] 已取消") return try: api.delete_repo(repo_id=repo_id, repo_type=repo_type) print(f"✓ 仓库 '{repo_id}' 已删除") except Exception as e: print(f"✗ 删除失败: {e}", file=sys.stderr) sys.exit(1) def cmd_delete_files(api: HfApi, args: argparse.Namespace) -> None: """根据通配符模式批量删除仓库中的文件。""" repo_id = args.repo_id patterns = args.patterns repo_type = args.type # 先列出匹配的文件,供用户预览 try: all_files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type) except Exception as e: print(f"[ERROR] 无法列出文件: {e}", file=sys.stderr) sys.exit(1) import fnmatch matched = [] for f in all_files: for pat in patterns: if fnmatch.fnmatch(f, pat): matched.append(f) break if not matched: print("[INFO] 没有匹配的文件") return print(f"匹配到 {len(matched)} 个文件:") for f in sorted(matched): print(f" - {f}") if not _confirm(f"确认删除这 {len(matched)} 个文件?", args.yes): print("[INFO] 已取消") return # 按通配符模式分批删除 total = len(patterns) total_batches = (total + _BATCH_SIZE - 1) // _BATCH_SIZE deleted_total = 0 for batch_idx in range(total_batches): start = batch_idx * _BATCH_SIZE end = min(start + _BATCH_SIZE, total) batch = patterns[start:end] try: api.delete_files( repo_id=repo_id, repo_type=repo_type, delete_patterns=batch, commit_message=( f"delete-hf: batch {batch_idx + 1}/{total_batches} " f"({len(batch)} patterns)" ), ) deleted_total += len(batch) print(f" ✓ 第 {batch_idx + 1}/{total_batches} 批: " f"删除 {len(batch)} 个通配符模式") if batch_idx + 1 < total_batches: time.sleep(1) except Exception as e: print(f" ✗ 第 {batch_idx + 1}/{total_batches} 批失败: {e}", file=sys.stderr) print(f" 已删除 {deleted_total}/{total} 个模式后中断") sys.exit(1) print(f"✓ 所有匹配文件已删除({len(matched)} 个文件,{total} 个模式)") def cmd_delete_storage(api: HfApi, args: argparse.Namespace) -> None: """删除 Space 的持久存储。""" space_id = args.space_id print(f"待删除存储的 Space: {space_id}") print(" 此操作不可恢复!所有持久存储数据将被清除。") if not _confirm(f"确认删除 '{space_id}' 的持久存储?", args.yes): print("[INFO] 已取消") return try: result = api.delete_space_storage(repo_id=space_id) print(f"✓ Space '{space_id}' 的持久存储已删除") if result: print(f" 当前运行时状态: {result}") except Exception as e: print(f"✗ 删除失败: {e}", file=sys.stderr) if "no persistent storage" in str(e).lower(): print(" [HINT] 该 Space 可能未启用持久存储", file=sys.stderr) sys.exit(1) def main(): parser = argparse.ArgumentParser( description="Hugging Face 资源删除工具" ) parser.add_argument("--token", default=None, help="HF API token") parser.add_argument("--yes", "-y", action="store_true", help="跳过确认提示") sub = parser.add_subparsers(dest="command", required=True, help="子命令") # --- repo --- p_repo = sub.add_parser("repo", help="删除整个仓库") p_repo.add_argument("repo_id", help="仓库 ID,如 user/space-name") p_repo.add_argument("--type", dest="type", required=True, choices=["space", "dataset", "model"], help="仓库类型") p_repo.set_defaults(func=cmd_delete_repo) # --- files --- p_files = sub.add_parser("files", help="批量删除仓库中的文件") p_files.add_argument("repo_id", help="仓库 ID") p_files.add_argument("--pattern", dest="patterns", required=True, action="append", help="通配符模式(可多次使用)") p_files.add_argument("--type", dest="type", default="dataset", choices=["dataset", "model", "space"], help="仓库类型(默认: dataset)") p_files.set_defaults(func=cmd_delete_files) # --- storage --- p_storage = sub.add_parser("storage", help="删除 Space 持久存储") p_storage.add_argument("space_id", help="Space ID,如 user/space-name") p_storage.set_defaults(func=cmd_delete_storage) args = parser.parse_args() # 创建 API 实例 api = HfApi(token=args.token) # 执行对应子命令 args.func(api, args) if __name__ == "__main__": main()