linvest21's picture
download
raw
5.83 kB
from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
from typing import Any
from n21.config import load_structured, write_json
from n21.settings import REPO_ROOT, SHFT_WORKSPACE_ROOT
from observability.audit_log import utc_now
def _run(command: list[str], *, cwd: Path) -> dict[str, object]:
proc = subprocess.run(command, cwd=cwd, text=True, capture_output=True, check=False)
return {
"command": command,
"cwd": str(cwd),
"returncode": proc.returncode,
"stdout": proc.stdout,
"stderr": proc.stderr,
}
def _hf_cli_authenticated() -> bool:
proc = subprocess.run(["hf", "auth", "whoami"], text=True, capture_output=True, check=False)
return proc.returncode == 0
def _model_card(config: dict[str, Any]) -> str:
b = config["bootstrap"]
return f"""---
base_model: {b["source_base_model_id"]}
library_name: peft
tags:
- finance
- fingpt
- linvest21
- lora
- shft
private: true
---
# {b["model_name"]}
Initial Linvest21 FinGPT bootstrap model.
## Source
Bootstrap adapter: `{b["source_adapter_repo_id"]}`
## Base Model
Requires access to `{b["source_base_model_id"]}`.
## Purpose
This is the starting Linvest21 financial LLM adapter for SHFT iteration.
## Version
`{b["version"]}`
## Governance
{b["governance_note"]}
Future iterations must include dataset manifests, eval evidence, approvals, and rollback metadata.
"""
def build_bootstrap_plan(config: dict[str, Any], *, live: bool) -> dict[str, object]:
b = config["bootstrap"]
local_root = REPO_ROOT / b["local_root"]
source_dir = local_root / b["source_local_dir"]
target_dir = local_root / b["target_local_dir"]
repo_visibility = "--private" if b.get("target_repo_private", True) else "--public"
return {
"created_at": utc_now(),
"live": live,
"model_name": b["model_name"],
"target_repo_id": b["target_repo_id"],
"source_adapter_repo_id": b["source_adapter_repo_id"],
"source_base_model_id": b["source_base_model_id"],
"credential_env_var": b["credential_env_var"],
"local_root": str(local_root),
"source_dir": str(source_dir),
"target_dir": str(target_dir),
"commands": [
["git", "lfs", "install"],
["hf", "repo", "create", b["target_repo_id"], "--repo-type", "model", repo_visibility, "--exist-ok"],
["git", "clone", f"https://huggingface.co/{b['source_adapter_repo_id']}", str(source_dir)],
["git", "clone", f"https://huggingface.co/{b['target_repo_id']}", str(target_dir)],
["copy_adapter_files", str(source_dir), str(target_dir)],
["write_model_card", str(target_dir / "README.md")],
["git", "add", "."],
["git", "commit", "-m", f"Initialize Linvest21 FinGPT bootstrap {b['version']}"],
["git", "push"],
],
"notes": [
"Dry-run mode writes this plan only.",
"Live mode requires HF_TOKEN in the environment.",
"The FinGPT adapter requires access to the base model listed in source_base_model_id.",
"The target repo should be a Hugging Face model repo, not a Space.",
],
}
def bootstrap_hf_model(config_path: Path, *, live: bool = False) -> dict[str, object]:
config = load_structured(config_path)
b = config["bootstrap"]
plan = build_bootstrap_plan(config, live=live)
out_dir = SHFT_WORKSPACE_ROOT / "registry" / "models" / b["model_name"]
out_dir.mkdir(parents=True, exist_ok=True)
write_json(out_dir / "bootstrap_plan.json", plan)
if not live:
return {"status": "planned", "plan_path": str(out_dir / "bootstrap_plan.json"), "plan": plan}
token_env = b["credential_env_var"]
if not os.environ.get(token_env) and not _hf_cli_authenticated():
return {"status": "blocked", "errors": [f"missing Hugging Face credential: set {token_env} or run `hf auth login`"], "plan": plan}
local_root = Path(plan["local_root"])
source_dir = Path(plan["source_dir"])
target_dir = Path(plan["target_dir"])
local_root.mkdir(parents=True, exist_ok=True)
results: list[dict[str, object]] = []
results.append(_run(["git", "lfs", "install"], cwd=local_root))
create_cmd = plan["commands"][1]
results.append(_run(create_cmd, cwd=local_root))
if not source_dir.exists():
results.append(_run(["git", "clone", f"https://huggingface.co/{b['source_adapter_repo_id']}", str(source_dir)], cwd=local_root))
if not target_dir.exists():
results.append(_run(["git", "clone", f"https://huggingface.co/{b['target_repo_id']}", str(target_dir)], cwd=local_root))
if not source_dir.exists() or not target_dir.exists():
write_json(out_dir / "bootstrap_result.json", {"status": "failed", "results": results, "plan": plan})
return {"status": "failed", "results": results, "plan": plan}
for item in source_dir.iterdir():
if item.name == ".git":
continue
dest = target_dir / item.name
if item.is_dir():
if dest.exists():
shutil.rmtree(dest)
shutil.copytree(item, dest)
else:
shutil.copy2(item, dest)
(target_dir / "README.md").write_text(_model_card(config), encoding="utf-8")
results.append(_run(["git", "add", "."], cwd=target_dir))
results.append(_run(["git", "commit", "-m", f"Initialize Linvest21 FinGPT bootstrap {b['version']}"], cwd=target_dir))
results.append(_run(["git", "push"], cwd=target_dir))
result = {"status": "completed" if all(r["returncode"] == 0 for r in results[-3:]) else "check_results", "results": results, "plan": plan}
write_json(out_dir / "bootstrap_result.json", result)
return result

Xet Storage Details

Size:
5.83 kB
·
Xet hash:
dec96b5469eb35d18ea040aea85f86f7b7ab43356ec0d95ae35d79fa35ad4dd8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.