fe / scripts /delete-hf.py
GGSheng's picture
fix: 强制推送更新 backup.py 修复逻辑
80b5c51 verified
#!/usr/bin/env python3
"""Hugging Face 资源删除工具 — 支持删除 Space / Dataset / Model / Storage / 文件。
用法:
# 删除整个仓库
python scripts/delete-hf.py repo <repo_id> --type space
python scripts/delete-hf.py repo <repo_id> --type dataset
python scripts/delete-hf.py repo <repo_id> --type model
# 删除仓库中的文件(支持通配符)
python scripts/delete-hf.py files <repo_id> --pattern "backups/*"
# 删除 Space 持久存储
python scripts/delete-hf.py storage <space_id>
# 交互确认 + token 参数
python scripts/delete-hf.py repo my-space --type space --yes
python scripts/delete-hf.py files my-backup --pattern "*.old" --token hf_xxx
"""
import argparse
import sys
import time
from huggingface_hub import HfApi
_BATCH_SIZE = 100
def _confirm(prompt: str, auto_yes: bool) -> bool:
if auto_yes:
return True
ans = input(f"{prompt} (yes/NO): ").strip().lower()
return ans in ("yes", "y")
def cmd_delete_repo(api: HfApi, args: argparse.Namespace) -> None:
"""删除整个仓库(Space / Dataset / Model)。"""
repo_id = args.repo_id
repo_type = args.type
print(f"待删除仓库: {repo_id} (type={repo_type})")
print(" 此操作不可恢复!")
if not _confirm(f"确认删除仓库 '{repo_id}'?", args.yes):
print("[INFO] 已取消")
return
try:
api.delete_repo(repo_id=repo_id, repo_type=repo_type)
print(f"✓ 仓库 '{repo_id}' 已删除")
except Exception as e:
print(f"✗ 删除失败: {e}", file=sys.stderr)
sys.exit(1)
def cmd_delete_files(api: HfApi, args: argparse.Namespace) -> None:
"""根据通配符模式批量删除仓库中的文件。"""
repo_id = args.repo_id
patterns = args.patterns
repo_type = args.type
# 先列出匹配的文件,供用户预览
try:
all_files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type)
except Exception as e:
print(f"[ERROR] 无法列出文件: {e}", file=sys.stderr)
sys.exit(1)
import fnmatch
matched = []
for f in all_files:
for pat in patterns:
if fnmatch.fnmatch(f, pat):
matched.append(f)
break
if not matched:
print("[INFO] 没有匹配的文件")
return
print(f"匹配到 {len(matched)} 个文件:")
for f in sorted(matched):
print(f" - {f}")
if not _confirm(f"确认删除这 {len(matched)} 个文件?", args.yes):
print("[INFO] 已取消")
return
# 按通配符模式分批删除
total = len(patterns)
total_batches = (total + _BATCH_SIZE - 1) // _BATCH_SIZE
deleted_total = 0
for batch_idx in range(total_batches):
start = batch_idx * _BATCH_SIZE
end = min(start + _BATCH_SIZE, total)
batch = patterns[start:end]
try:
api.delete_files(
repo_id=repo_id,
repo_type=repo_type,
delete_patterns=batch,
commit_message=(
f"delete-hf: batch {batch_idx + 1}/{total_batches} "
f"({len(batch)} patterns)"
),
)
deleted_total += len(batch)
print(f" ✓ 第 {batch_idx + 1}/{total_batches} 批: "
f"删除 {len(batch)} 个通配符模式")
if batch_idx + 1 < total_batches:
time.sleep(1)
except Exception as e:
print(f" ✗ 第 {batch_idx + 1}/{total_batches} 批失败: {e}",
file=sys.stderr)
print(f" 已删除 {deleted_total}/{total} 个模式后中断")
sys.exit(1)
print(f"✓ 所有匹配文件已删除({len(matched)} 个文件,{total} 个模式)")
def cmd_delete_storage(api: HfApi, args: argparse.Namespace) -> None:
"""删除 Space 的持久存储。"""
space_id = args.space_id
print(f"待删除存储的 Space: {space_id}")
print(" 此操作不可恢复!所有持久存储数据将被清除。")
if not _confirm(f"确认删除 '{space_id}' 的持久存储?", args.yes):
print("[INFO] 已取消")
return
try:
result = api.delete_space_storage(repo_id=space_id)
print(f"✓ Space '{space_id}' 的持久存储已删除")
if result:
print(f" 当前运行时状态: {result}")
except Exception as e:
print(f"✗ 删除失败: {e}", file=sys.stderr)
if "no persistent storage" in str(e).lower():
print(" [HINT] 该 Space 可能未启用持久存储", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Hugging Face 资源删除工具"
)
parser.add_argument("--token", default=None, help="HF API token")
parser.add_argument("--yes", "-y", action="store_true",
help="跳过确认提示")
sub = parser.add_subparsers(dest="command", required=True,
help="子命令")
# --- repo ---
p_repo = sub.add_parser("repo", help="删除整个仓库")
p_repo.add_argument("repo_id", help="仓库 ID,如 user/space-name")
p_repo.add_argument("--type", dest="type", required=True,
choices=["space", "dataset", "model"],
help="仓库类型")
p_repo.set_defaults(func=cmd_delete_repo)
# --- files ---
p_files = sub.add_parser("files", help="批量删除仓库中的文件")
p_files.add_argument("repo_id", help="仓库 ID")
p_files.add_argument("--pattern", dest="patterns",
required=True, action="append",
help="通配符模式(可多次使用)")
p_files.add_argument("--type", dest="type", default="dataset",
choices=["dataset", "model", "space"],
help="仓库类型(默认: dataset)")
p_files.set_defaults(func=cmd_delete_files)
# --- storage ---
p_storage = sub.add_parser("storage", help="删除 Space 持久存储")
p_storage.add_argument("space_id", help="Space ID,如 user/space-name")
p_storage.set_defaults(func=cmd_delete_storage)
args = parser.parse_args()
# 创建 API 实例
api = HfApi(token=args.token)
# 执行对应子命令
args.func(api, args)
if __name__ == "__main__":
main()