Buckets:
| from __future__ import annotations | |
| import os | |
| import shutil | |
| import subprocess | |
| from pathlib import Path | |
| from typing import Any | |
| from n21.config import load_structured, write_json | |
| from n21.settings import REPO_ROOT, SHFT_WORKSPACE_ROOT | |
| from observability.audit_log import utc_now | |
| def _run(command: list[str], *, cwd: Path) -> dict[str, object]: | |
| proc = subprocess.run(command, cwd=cwd, text=True, capture_output=True, check=False) | |
| return { | |
| "command": command, | |
| "cwd": str(cwd), | |
| "returncode": proc.returncode, | |
| "stdout": proc.stdout, | |
| "stderr": proc.stderr, | |
| } | |
| def _hf_cli_authenticated() -> bool: | |
| proc = subprocess.run(["hf", "auth", "whoami"], text=True, capture_output=True, check=False) | |
| return proc.returncode == 0 | |
| def _model_card(config: dict[str, Any]) -> str: | |
| b = config["bootstrap"] | |
| return f"""--- | |
| base_model: {b["source_base_model_id"]} | |
| library_name: peft | |
| tags: | |
| - finance | |
| - fingpt | |
| - linvest21 | |
| - lora | |
| - shft | |
| private: true | |
| --- | |
| # {b["model_name"]} | |
| Initial Linvest21 FinGPT bootstrap model. | |
| ## Source | |
| Bootstrap adapter: `{b["source_adapter_repo_id"]}` | |
| ## Base Model | |
| Requires access to `{b["source_base_model_id"]}`. | |
| ## Purpose | |
| This is the starting Linvest21 financial LLM adapter for SHFT iteration. | |
| ## Version | |
| `{b["version"]}` | |
| ## Governance | |
| {b["governance_note"]} | |
| Future iterations must include dataset manifests, eval evidence, approvals, and rollback metadata. | |
| """ | |
| def build_bootstrap_plan(config: dict[str, Any], *, live: bool) -> dict[str, object]: | |
| b = config["bootstrap"] | |
| local_root = REPO_ROOT / b["local_root"] | |
| source_dir = local_root / b["source_local_dir"] | |
| target_dir = local_root / b["target_local_dir"] | |
| repo_visibility = "--private" if b.get("target_repo_private", True) else "--public" | |
| return { | |
| "created_at": utc_now(), | |
| "live": live, | |
| "model_name": b["model_name"], | |
| "target_repo_id": b["target_repo_id"], | |
| "source_adapter_repo_id": b["source_adapter_repo_id"], | |
| "source_base_model_id": b["source_base_model_id"], | |
| "credential_env_var": b["credential_env_var"], | |
| "local_root": str(local_root), | |
| "source_dir": str(source_dir), | |
| "target_dir": str(target_dir), | |
| "commands": [ | |
| ["git", "lfs", "install"], | |
| ["hf", "repo", "create", b["target_repo_id"], "--repo-type", "model", repo_visibility, "--exist-ok"], | |
| ["git", "clone", f"https://huggingface.co/{b['source_adapter_repo_id']}", str(source_dir)], | |
| ["git", "clone", f"https://huggingface.co/{b['target_repo_id']}", str(target_dir)], | |
| ["copy_adapter_files", str(source_dir), str(target_dir)], | |
| ["write_model_card", str(target_dir / "README.md")], | |
| ["git", "add", "."], | |
| ["git", "commit", "-m", f"Initialize Linvest21 FinGPT bootstrap {b['version']}"], | |
| ["git", "push"], | |
| ], | |
| "notes": [ | |
| "Dry-run mode writes this plan only.", | |
| "Live mode requires HF_TOKEN in the environment.", | |
| "The FinGPT adapter requires access to the base model listed in source_base_model_id.", | |
| "The target repo should be a Hugging Face model repo, not a Space.", | |
| ], | |
| } | |
| def bootstrap_hf_model(config_path: Path, *, live: bool = False) -> dict[str, object]: | |
| config = load_structured(config_path) | |
| b = config["bootstrap"] | |
| plan = build_bootstrap_plan(config, live=live) | |
| out_dir = SHFT_WORKSPACE_ROOT / "registry" / "models" / b["model_name"] | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| write_json(out_dir / "bootstrap_plan.json", plan) | |
| if not live: | |
| return {"status": "planned", "plan_path": str(out_dir / "bootstrap_plan.json"), "plan": plan} | |
| token_env = b["credential_env_var"] | |
| if not os.environ.get(token_env) and not _hf_cli_authenticated(): | |
| return {"status": "blocked", "errors": [f"missing Hugging Face credential: set {token_env} or run `hf auth login`"], "plan": plan} | |
| local_root = Path(plan["local_root"]) | |
| source_dir = Path(plan["source_dir"]) | |
| target_dir = Path(plan["target_dir"]) | |
| local_root.mkdir(parents=True, exist_ok=True) | |
| results: list[dict[str, object]] = [] | |
| results.append(_run(["git", "lfs", "install"], cwd=local_root)) | |
| create_cmd = plan["commands"][1] | |
| results.append(_run(create_cmd, cwd=local_root)) | |
| if not source_dir.exists(): | |
| results.append(_run(["git", "clone", f"https://huggingface.co/{b['source_adapter_repo_id']}", str(source_dir)], cwd=local_root)) | |
| if not target_dir.exists(): | |
| results.append(_run(["git", "clone", f"https://huggingface.co/{b['target_repo_id']}", str(target_dir)], cwd=local_root)) | |
| if not source_dir.exists() or not target_dir.exists(): | |
| write_json(out_dir / "bootstrap_result.json", {"status": "failed", "results": results, "plan": plan}) | |
| return {"status": "failed", "results": results, "plan": plan} | |
| for item in source_dir.iterdir(): | |
| if item.name == ".git": | |
| continue | |
| dest = target_dir / item.name | |
| if item.is_dir(): | |
| if dest.exists(): | |
| shutil.rmtree(dest) | |
| shutil.copytree(item, dest) | |
| else: | |
| shutil.copy2(item, dest) | |
| (target_dir / "README.md").write_text(_model_card(config), encoding="utf-8") | |
| results.append(_run(["git", "add", "."], cwd=target_dir)) | |
| results.append(_run(["git", "commit", "-m", f"Initialize Linvest21 FinGPT bootstrap {b['version']}"], cwd=target_dir)) | |
| results.append(_run(["git", "push"], cwd=target_dir)) | |
| result = {"status": "completed" if all(r["returncode"] == 0 for r in results[-3:]) else "check_results", "results": results, "plan": plan} | |
| write_json(out_dir / "bootstrap_result.json", result) | |
| return result | |
Xet Storage Details
- Size:
- 5.83 kB
- Xet hash:
- dec96b5469eb35d18ea040aea85f86f7b7ab43356ec0d95ae35d79fa35ad4dd8
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.