Buckets:

linvest21
/

shft-artifacts

Files

xet

linvest21/shft-artifacts / code /self_healing_finetuning /release_packaging /release.py

linvest21

2 days ago

download

raw

47.9 kB

	from __future__ import annotations

	import hashlib
	import json
	from pathlib import Path
	import shutil
	import textwrap
	from typing import Any
	import zipfile

	from n21.config import write_json
	from n21.settings import IMPLEMENTATION_PRODUCTS_ROOT, REPO_ROOT, SHFT_WORKSPACE_ROOT
	from observability.audit_log import utc_now


	DEFAULT_BASE_MODEL = "meta-llama/Meta-Llama-3-8B"
	DEFAULT_SOURCE_RUN_ID = "run_step19_repair_train_001"
	ROLE_NAMES = [
	"chief_investment_officer",
	"client_portfolio_manager",
	"performance_manager",
	"portfolio_manager",
	"researcher",
	"risk_manager",
	]
	def product_dir_for(release_id: str) -> Path:
	return IMPLEMENTATION_PRODUCTS_ROOT / release_id


	def product_zip_for(release_id: str) -> Path:
	return IMPLEMENTATION_PRODUCTS_ROOT / f"{release_id}.zip"


	def find_release_dir(release_id: str) -> tuple[Path, str]:
	product_dir = product_dir_for(release_id)
	return product_dir, "implementation_products"


	def sha256_file(path: Path) -> str:
	digest = hashlib.sha256()
	with path.open("rb") as handle:
	for chunk in iter(lambda: handle.read(1024 * 1024), b""):
	digest.update(chunk)
	return digest.hexdigest()


	def copy_tree(src: Path, dst: Path) -> None:
	if not src.exists():
	raise FileNotFoundError(f"required release source does not exist: {src}")
	if dst.exists():
	shutil.rmtree(dst)
	shutil.copytree(src, dst)


	def copy_if_exists(src: Path, dst: Path) -> bool:
	if not src.exists():
	return False
	dst.parent.mkdir(parents=True, exist_ok=True)
	shutil.copy2(src, dst)
	return True


	def collect_hashes(root: Path) -> dict[str, str]:
	hashes: dict[str, str] = {}
	for path in sorted(root.rglob("*")):
	if path.is_file():
	hashes[str(path.relative_to(root)).replace("\\", "/")] = sha256_file(path)
	return hashes


	def write_text(path: Path, content: str) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(textwrap.dedent(content).lstrip(), encoding="utf-8", newline="\n")


	def infer_release_identity(release_id: str, *, model_id: str \| None = None, asset_class: str \| None = None, role: str \| None = None) -> dict[str, str \| None]:
	parsed_asset = asset_class
	parsed_role = role
	prefix = "linvest21_fingpt_"
	if (not parsed_asset or not parsed_role) and release_id.lower().startswith(prefix):
	stem = release_id[len(prefix):]
	for suffix in ["_v1_000", "_v1_001", "_v1_002"]:
	if stem.lower().endswith(suffix):
	stem = stem[: -len(suffix)]
	break
	for candidate_role in ROLE_NAMES:
	marker = f"_{candidate_role}"
	if stem.endswith(marker):
	parsed_asset = parsed_asset or stem[: -len(marker)]
	parsed_role = parsed_role or candidate_role
	break
	display_model_id = model_id or release_id
	return {
	"model_id": display_model_id,
	"asset_class": parsed_asset,
	"role": parsed_role,
	}


	def role_title(value: str \| None) -> str:
	if not value:
	return "Financial Analysis Assistant"
	return value.replace("_", " ").title()


	def write_runtime_files(release_dir: Path, *, release_id: str, model_id: str, asset_class: str \| None, role: str \| None, base_model_id: str) -> None:
	runtime = release_dir / "runtime"
	role_label = role_title(role)
	domain = asset_class.replace("_", " ") if asset_class else "financial"
	write_json(
	runtime / "chat_config.json",
	{
	"release_id": release_id,
	"model_id": model_id,
	"display_name": model_id,
	"asset_class": asset_class,
	"role": role,
	"base_model": base_model_id,
	"adapter_dir": "../model/adapter",
	"max_new_tokens": 120,
	"temperature": 0.2,
	"top_p": 0.9,
	"system_prompt": (
	f"You are {model_id}, the Linvest21_FinGPT {domain} {role_label} super-agent. "
	"Answer in that specialist role. Give concise, factual, numerate answers, separate facts from inference, "
	"state uncertainty when data is incomplete, and avoid personalized investment advice."
	),
	"api": {
	"host": "127.0.0.1",
	"port": 8765,
	"token_env_var": "LINVEST21_API_TOKEN",
	"require_token": True,
	},
	},
	)
	write_text(
	runtime / "requirements_cpu.txt",
	"""
	torch
	transformers
	peft
	accelerate
	safetensors
	huggingface_hub
	""",
	)
	write_text(
	runtime / "requirements_gpu_cuda121.txt",
	"""
	--index-url https://download.pytorch.org/whl/cu121
	torch
	--extra-index-url https://pypi.org/simple
	transformers
	peft
	accelerate
	safetensors
	huggingface_hub
	""",
	)
	write_text(
	runtime / "chat_console.py",
	r'''
	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path
	import sys
	import time


	def status(message: str) -> None:
	print(f"[Linvest21_FinGPT {time.strftime('%H:%M:%S')}] {message}", flush=True)


	def load_config(path: Path) -> dict:
	with path.open("r", encoding="utf-8") as handle:
	config = json.load(handle)
	base = path.parent
	config["adapter_dir"] = str((base / config["adapter_dir"]).resolve())
	return config


	def build_prompt(tokenizer, messages: list[dict[str, str]]) -> str:
	if getattr(tokenizer, "chat_template", None):
	return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	lines = [f"{item['role'].capitalize()}: {item['content']}" for item in messages]
	lines.append("Assistant:")
	return "\n".join(lines)


	def main(argv: list[str] \| None = None) -> int:
	parser = argparse.ArgumentParser(description="Portable Linvest21_FinGPT chat console")
	parser.add_argument("--config", default=str(Path(__file__).with_name("chat_config.json")))
	parser.add_argument("--base-model")
	parser.add_argument("--adapter-dir")
	parser.add_argument("--max-new-tokens", type=int)
	parser.add_argument("--temperature", type=float)
	parser.add_argument("--top-p", type=float)
	args = parser.parse_args(argv)

	config = load_config(Path(args.config).resolve())
	for key in ["base_model", "adapter_dir", "max_new_tokens", "temperature", "top_p"]:
	value = getattr(args, key, None)
	if value is not None:
	config[key] = value
	model_id = config.get("model_id", config.get("release_id", "linvest21_fingpt"))

	try:
	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	except ImportError as exc:
	raise SystemExit(f"Missing dependency: {exc.name}. Run install_cpu.bat or install_gpu_cuda121.bat first.") from exc

	adapter_dir = Path(config["adapter_dir"]).resolve()
	if not (adapter_dir / "adapter_model.safetensors").exists():
	raise SystemExit(f"Adapter weights not found: {adapter_dir}")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if device == "cuda" else torch.float32
	status(f"model_id={model_id}")
	status(f"base_model={config['base_model']}")
	status(f"adapter_dir={adapter_dir}")
	status(f"device={device}")
	if device == "cpu":
	status("CPU inference can be slow for Llama-3-8B.")

	tokenizer = AutoTokenizer.from_pretrained(config["base_model"], use_fast=True)
	if tokenizer.pad_token_id is None:
	tokenizer.pad_token = tokenizer.eos_token
	model = AutoModelForCausalLM.from_pretrained(
	config["base_model"],
	torch_dtype=dtype,
	low_cpu_mem_usage=True,
	)
	model = PeftModel.from_pretrained(model, str(adapter_dir))
	model.eval()
	model.to(device)
	model.generation_config.max_length = None

	messages = [{"role": "system", "content": config["system_prompt"]}]
	status("ready. Type /clear to reset and /exit to quit.")
	while True:
	try:
	user_text = input("\nlinvest21> ").strip()
	except (KeyboardInterrupt, EOFError):
	print()
	return 0
	if not user_text:
	continue
	if user_text.lower() in {"/exit", "exit", "quit", "/quit"}:
	return 0
	if user_text.lower() == "/clear":
	messages = [{"role": "system", "content": config["system_prompt"]}]
	status("context cleared")
	continue
	messages.append({"role": "user", "content": user_text})
	inputs = tokenizer(build_prompt(tokenizer, messages), return_tensors="pt").to(device)
	input_len = inputs["input_ids"].shape[-1]
	status("generating")
	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=int(config["max_new_tokens"]),
	do_sample=float(config["temperature"]) > 0,
	temperature=float(config["temperature"]),
	top_p=float(config["top_p"]),
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)
	answer = tokenizer.decode(output[0][input_len:], skip_special_tokens=True).strip() or "[empty response]"
	print(f"\n{model_id}: {answer}")
	messages.append({"role": "assistant", "content": answer})


	if __name__ == "__main__":
	raise SystemExit(main())
	''',
	)
	write_text(
	runtime / "install_cpu.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	"%PYTHON_EXE%" -m pip install -r "%~dp0requirements_cpu.txt"
	endlocal
	""",
	)
	write_text(
	runtime / "install_gpu_cuda121.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	"%PYTHON_EXE%" -m pip install -r "%~dp0requirements_gpu_cuda121.txt"
	endlocal
	""",
	)
	write_text(
	runtime / "run_chat_cpu.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	set "CUDA_VISIBLE_DEVICES="
	cd /d "%~dp0"
	"%PYTHON_EXE%" chat_console.py --config "%~dp0chat_config.json"
	endlocal
	""",
	)
	write_text(
	runtime / "run_chat_gpu.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	cd /d "%~dp0"
	"%PYTHON_EXE%" chat_console.py --config "%~dp0chat_config.json"
	endlocal
	""",
	)
	write_text(
	runtime / "run_api_cpu.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	set "CUDA_VISIBLE_DEVICES="
	if "%LINVEST21_API_TOKEN%"=="" (
	echo [SHFT API] LINVEST21_API_TOKEN is not set. Set it in this shell before starting the API.
	exit /b 2
	)
	cd /d "%~dp0"
	"%PYTHON_EXE%" serve_api.py --config "%~dp0chat_config.json"
	endlocal
	""",
	)
	write_text(
	runtime / "run_api_gpu.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	if "%LINVEST21_API_TOKEN%"=="" (
	echo [SHFT API] LINVEST21_API_TOKEN is not set. Set it in this shell before starting the API.
	exit /b 2
	)
	cd /d "%~dp0"
	"%PYTHON_EXE%" serve_api.py --config "%~dp0chat_config.json"
	endlocal
	""",
	)
	write_text(
	runtime / "run_api_self_test.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	cd /d "%~dp0"
	"%PYTHON_EXE%" self_test_api_contract.py --config "%~dp0chat_config.json"
	endlocal
	""",
	)
	write_text(
	runtime / "serve_api.py",
	r'''
	from __future__ import annotations

	import argparse
	import json
	import os
	from pathlib import Path
	from http.server import BaseHTTPRequestHandler, HTTPServer
	import time
	import uuid


	def load_config(path: Path) -> dict:
	with path.open("r", encoding="utf-8") as handle:
	config = json.load(handle)
	base = path.parent
	config["adapter_dir"] = str((base / config["adapter_dir"]).resolve())
	return config


	def build_prompt(tokenizer, messages: list[dict[str, str]]) -> str:
	if getattr(tokenizer, "chat_template", None):
	return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	lines = [f"{item['role'].capitalize()}: {item['content']}" for item in messages]
	lines.append("Assistant:")
	return "\n".join(lines)


	class Linvest21Model:
	def __init__(self, config: dict):
	self.config = config
	self.base_model_id = config["base_model"]
	self.model_id = config.get("model_id", config.get("release_id", "linvest21_fingpt"))
	self.adapter_dir = Path(config["adapter_dir"]).resolve()
	self.device = "cpu"
	self.tokenizer = None
	self.model = None

	def load(self) -> None:
	try:
	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	except ImportError as exc:
	raise SystemExit(f"Missing dependency: {exc.name}. Run install_cpu.bat or install_gpu_cuda121.bat first.") from exc

	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if self.device == "cuda" else torch.float32
	print(f"[SHFT API] loading model_id={self.model_id}", flush=True)
	print(f"[SHFT API] loading base_model={self.base_model_id}", flush=True)
	tokenizer = AutoTokenizer.from_pretrained(self.base_model_id, use_fast=True)
	if tokenizer.pad_token_id is None:
	tokenizer.pad_token = tokenizer.eos_token
	model = AutoModelForCausalLM.from_pretrained(self.base_model_id, torch_dtype=dtype, low_cpu_mem_usage=True)
	if (self.adapter_dir / "adapter_model.safetensors").exists():
	print(f"[SHFT API] loading adapter={self.adapter_dir}", flush=True)
	model = PeftModel.from_pretrained(model, str(self.adapter_dir))
	model.eval()
	model.to(self.device)
	model.generation_config.max_length = None
	self.tokenizer = tokenizer
	self.model = model
	print(f"[SHFT API] ready device={self.device}", flush=True)

	def generate(self, messages: list[dict[str, str]], *, max_new_tokens: int \| None = None, temperature: float \| None = None, top_p: float \| None = None) -> dict:
	if self.model is None or self.tokenizer is None:
	raise RuntimeError("model is not loaded")
	import torch

	if not messages or messages[0].get("role") != "system":
	messages = [{"role": "system", "content": self.config["system_prompt"]}, *messages]
	max_new_tokens = int(max_new_tokens or self.config.get("max_new_tokens", 120))
	temperature = float(self.config.get("temperature", 0.2) if temperature is None else temperature)
	top_p = float(self.config.get("top_p", 0.9) if top_p is None else top_p)
	inputs = self.tokenizer(build_prompt(self.tokenizer, messages), return_tensors="pt").to(self.device)
	input_len = inputs["input_ids"].shape[-1]
	started = time.time()
	with torch.no_grad():
	output = self.model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=temperature > 0,
	temperature=temperature,
	top_p=top_p,
	pad_token_id=self.tokenizer.eos_token_id,
	eos_token_id=self.tokenizer.eos_token_id,
	)
	answer = self.tokenizer.decode(output[0][input_len:], skip_special_tokens=True).strip() or "[empty response]"
	output_len = output.shape[-1] - input_len
	return {
	"content": answer,
	"usage": {
	"prompt_tokens": int(input_len),
	"completion_tokens": int(output_len),
	"total_tokens": int(output.shape[-1]),
	},
	"latency_ms": round((time.time() - started) * 1000, 2),
	}


	def make_handler(engine: Linvest21Model, api_token: str \| None, require_token: bool):
	class Handler(BaseHTTPRequestHandler):
	server_version = "Linvest21FinGPTAPI/1.0"

	def _send_json(self, code: int, payload: dict) -> None:
	body = json.dumps(payload).encode("utf-8")
	self.send_response(code)
	self.send_header("Content-Type", "application/json")
	self.send_header("Content-Length", str(len(body)))
	self.end_headers()
	self.wfile.write(body)

	def _authorized(self) -> bool:
	if not require_token:
	return True
	header = self.headers.get("Authorization", "")
	return bool(api_token) and header == f"Bearer {api_token}"

	def _read_json(self) -> dict:
	length = int(self.headers.get("Content-Length", "0"))
	if length <= 0:
	return {}
	return json.loads(self.rfile.read(length).decode("utf-8"))

	def do_GET(self):
	if self.path == "/health":
	self._send_json(200, {"ok": True, "service": "linvest21_fingpt", "model": engine.model_id, "base_model": engine.base_model_id, "device": engine.device, "auth_required": require_token})
	return
	if self.path == "/v1/models":
	if not self._authorized():
	self._send_json(401, {"error": {"message": "missing or invalid bearer token"}})
	return
	self._send_json(200, {"object": "list", "data": [{"id": engine.model_id, "object": "model", "base_model": engine.base_model_id, "asset_class": engine.config.get("asset_class"), "role": engine.config.get("role")}]})
	return
	self._send_json(404, {"error": {"message": "not found"}})

	def do_POST(self):
	if self.path not in {"/v1/chat/completions", "/v1/generate"}:
	self._send_json(404, {"error": {"message": "not found"}})
	return
	if not self._authorized():
	self._send_json(401, {"error": {"message": "missing or invalid bearer token"}})
	return
	try:
	payload = self._read_json()
	if self.path == "/v1/generate":
	prompt = str(payload.get("prompt", ""))
	messages = [{"role": "user", "content": prompt}]
	else:
	messages = payload.get("messages", [])
	if not isinstance(messages, list):
	raise ValueError("messages must be a list")
	result = engine.generate(
	messages,
	max_new_tokens=payload.get("max_new_tokens"),
	temperature=payload.get("temperature"),
	top_p=payload.get("top_p"),
	)
	except Exception as exc:
	self._send_json(400, {"error": {"message": str(exc)}})
	return

	response = {
	"id": f"chatcmpl-{uuid.uuid4().hex}",
	"object": "chat.completion",
	"created": int(time.time()),
	"model": engine.model_id,
	"choices": [{"index": 0, "message": {"role": "assistant", "content": result["content"]}, "finish_reason": "stop"}],
	"usage": result["usage"],
	"latency_ms": result["latency_ms"],
	}
	self._send_json(200, response)

	def log_message(self, fmt: str, *args) -> None:
	print(f"[SHFT API] {self.address_string()} {fmt % args}", flush=True)

	return Handler


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Token-protected JSON API for portable Linvest21_FinGPT.")
	parser.add_argument("--config", default=str(Path(__file__).with_name("chat_config.json")))
	parser.add_argument("--host")
	parser.add_argument("--port", type=int)
	parser.add_argument("--allow-no-token", action="store_true", help="Local development only: disable bearer-token requirement.")
	args = parser.parse_args()
	config = load_config(Path(args.config).resolve())
	api_config = config.get("api", {})
	host = args.host or api_config.get("host", "127.0.0.1")
	port = int(args.port or api_config.get("port", 8765))
	token_env_var = api_config.get("token_env_var", "LINVEST21_API_TOKEN")
	token = os.environ.get(token_env_var)
	require_token = bool(api_config.get("require_token", True)) and not args.allow_no_token
	if require_token and not token:
	raise SystemExit(f"{token_env_var} is required. Set it in the shell; do not store it in files.")
	engine = Linvest21Model(config)
	engine.load()
	print(f"[SHFT API] serving http://{host}:{port} auth_required={require_token}", flush=True)
	HTTPServer((host, port), make_handler(engine, token, require_token)).serve_forever()
	''',
	)
	write_text(
	runtime / "self_test_api_contract.py",
	r'''
	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path


	REQUIRED_ENDPOINTS = [
	"GET /health",
	"GET /v1/models",
	"POST /v1/chat/completions",
	"POST /v1/generate",
	]


	def main() -> int:
	parser = argparse.ArgumentParser(description="Self-test the portable Linvest21_FinGPT JSON API contract without loading model weights.")
	parser.add_argument("--config", default=str(Path(__file__).with_name("chat_config.json")))
	args = parser.parse_args()
	runtime_dir = Path(args.config).resolve().parent
	config = json.loads(Path(args.config).read_text(encoding="utf-8"))
	api = config.get("api", {})
	files = {
	"serve_api": runtime_dir / "serve_api.py",
	"run_api_cpu": runtime_dir / "run_api_cpu.bat",
	"run_api_gpu": runtime_dir / "run_api_gpu.bat",
	"chat_config": Path(args.config).resolve(),
	}
	errors: list[str] = []
	for name, path in files.items():
	if not path.exists():
	errors.append(f"missing {name}: {path}")
	if api.get("require_token") is not True:
	errors.append("chat_config api.require_token must be true")
	if api.get("token_env_var") != "LINVEST21_API_TOKEN":
	errors.append("chat_config api.token_env_var must be LINVEST21_API_TOKEN")
	source = files["serve_api"].read_text(encoding="utf-8") if files["serve_api"].exists() else ""
	for marker in ["/health", "/v1/models", "/v1/chat/completions", "/v1/generate", "Authorization", "Bearer"]:
	if marker not in source:
	errors.append(f"serve_api.py missing marker: {marker}")
	report = {
	"ok": not errors,
	"errors": errors,
	"api": {
	"host": api.get("host"),
	"port": api.get("port"),
	"require_token": api.get("require_token"),
	"token_env_var": api.get("token_env_var"),
	"endpoints": REQUIRED_ENDPOINTS,
	"json_support": True,
	"token_storage_policy": "runtime_environment_only",
	},
	"files": {name: str(path) for name, path in files.items()},
	}
	print(json.dumps(report, indent=2))
	return 0 if report["ok"] else 3


	if __name__ == "__main__":
	raise SystemExit(main())
	''',
	)
	tools = release_dir / "tools"
	write_text(
	tools / "merge_hf_model.py",
	r'''
	from __future__ import annotations

	import argparse
	from pathlib import Path


	def main() -> int:
	parser = argparse.ArgumentParser(description="Merge a base HF model and Linvest21 LoRA adapter into a full HF model directory.")
	parser.add_argument("--base-model", default="meta-llama/Meta-Llama-3-8B")
	parser.add_argument("--adapter-dir", default=str(Path(__file__).resolve().parents[1] / "model" / "adapter"))
	parser.add_argument("--output-dir", default=str(Path(__file__).resolve().parents[1] / "model" / "merged_hf"))
	parser.add_argument("--dtype", choices=["float16", "bfloat16", "float32"], default="float16")
	args = parser.parse_args()

	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer

	dtype = {"float16": torch.float16, "bfloat16": torch.bfloat16, "float32": torch.float32}[args.dtype]
	output_dir = Path(args.output_dir).resolve()
	output_dir.mkdir(parents=True, exist_ok=True)
	print(f"[SHFT merge] loading base={args.base_model}")
	base = AutoModelForCausalLM.from_pretrained(args.base_model, torch_dtype=dtype, low_cpu_mem_usage=True)
	print(f"[SHFT merge] loading adapter={args.adapter_dir}")
	model = PeftModel.from_pretrained(base, args.adapter_dir)
	print("[SHFT merge] merging adapter into base model")
	merged = model.merge_and_unload()
	print(f"[SHFT merge] saving merged model to {output_dir}")
	merged.save_pretrained(output_dir, safe_serialization=True)
	tokenizer = AutoTokenizer.from_pretrained(args.base_model, use_fast=True)
	tokenizer.save_pretrained(output_dir)
	print("[SHFT merge] completed")
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())
	''',
	)
	write_text(
	tools / "run_merge_hf_model.bat",
	r"""
	@echo off
	setlocal
	set "PYTHON_EXE=python"
	if exist "%USERPROFILE%\miniconda3\python.exe" set "PYTHON_EXE=%USERPROFILE%\miniconda3\python.exe"
	"%PYTHON_EXE%" "%~dp0merge_hf_model.py" --base-model meta-llama/Meta-Llama-3-8B --adapter-dir "%~dp0..\model\adapter" --output-dir "%~dp0..\model\merged_hf"
	endlocal
	""",
	)
	write_text(
	tools / "EXPORT_MODES.md",
	"""
	# Export Modes

	## adapter_only

	Includes the trained LoRA adapter and runtime. This is the default small portable bundle. It still needs the target machine to download or cache the gated base model.

	## merged_hf

	Includes a full Hugging Face model directory under `model/merged_hf` when provided during export or produced with `tools/run_merge_hf_model.bat`.

	This mode is easier to run on another machine, but the merged model contains base-model weights and must only be distributed under the applicable Meta Llama and upstream adapter license terms.

	## quantized_gguf

	Includes a `model/gguf/*.gguf` file when provided during export. This is the most portable CPU/RAM path for llama.cpp-style inference.

	Produce it from `model/merged_hf` with llama.cpp conversion and quantization tools on a machine with enough disk and memory.
	""",
	)


	def write_release_docs(
	release_dir: Path,
	release_id: str,
	export_mode: str,
	source_run_id: str,
	base_model_id: str,
	*,
	model_id: str,
	asset_class: str \| None,
	role: str \| None,
	) -> None:
	write_text(
	release_dir / "README.md",
	f"""
	# Linvest21 FinGPT Release Bundle: {release_id}

	This directory is a portable SHFT implementation release bundle.

	Runtime model ID:

	```text
	{model_id}
	```

	Asset class: `{asset_class or "unspecified"}`

	Role: `{role or "unspecified"}`

	It can be produced and certified from the source workspace with:

	```powershell
	.\\impl_codex\\scripts\\run_shft_0_to_16_sample_to_implementation.bat
	```

	## What Is Included

	- `model/adapter`: the trained Linvest21 LoRA adapter copied from `{source_run_id}`.
	- `runtime`: portable Windows chat/API scripts and dependency manifests.
	- `runtime/serve_api.py`: token-protected JSON API server.
	- `runtime/self_test_api_contract.py`: API contract verifier that does not load model weights.
	- `evidence`: copied evaluation, training, and candidate evidence.
	- `release_manifest.json`: machine-readable release metadata.
	- `release_hashes.json`: SHA-256 hashes for release files.

	## Export Mode

	```text
	{export_mode}
	```

	`adapter_only` is small and portable, but still requires access to the gated base model:

	```text
	{base_model_id}
	```

	## Run

	First install dependencies:

	```powershell
	.\\runtime\\install_cpu.bat
	```

	Or, on a CUDA machine:

	```powershell
	.\\runtime\\install_gpu_cuda121.bat
	```

	Then start chat:

	```powershell
	.\\runtime\\run_chat_cpu.bat
	```

	Or:

	```powershell
	.\\runtime\\run_chat_gpu.bat
	```

	## JSON API

	Set a local API token in the shell, then start the API:

	```powershell
	set LINVEST21_API_TOKEN=<your-local-api-token>
	.\\runtime\\run_api_cpu.bat
	```

	Endpoints:

	```text
	GET /health
	GET /v1/models Authorization: Bearer <token>
	POST /v1/chat/completions Authorization: Bearer <token>
	POST /v1/generate Authorization: Bearer <token>
	```

	Tokens are never stored in the release bundle or implementation config.

	Self-test the API contract without loading model weights:

	```powershell
	.\\runtime\\run_api_self_test.bat
	```

	From the SHFT source workspace, certify the generated portable bundle before copying it to another machine:

	```powershell
	.\\impl_codex\\scripts\\self_certify_portable_api.bat {release_id}
	```

	## Security

	Do not place Hugging Face tokens in this directory. Set `HF_TOKEN` in the target machine environment or authenticate with:

	```powershell
	hf auth login
	```

	## License Boundary

	This bundle includes the Linvest21 adapter. The base model remains governed by the Meta Llama license and Hugging Face gated-access controls unless a future `merged_hf` or `quantized_gguf` export is produced and approved for distribution.
	""",
	)
	write_text(
	release_dir / "docs" / "SECURITY.md",
	"""
	# Security Notes

	- Real provider tokens must not be committed or copied into this release.
	- Use `HF_TOKEN` or Hugging Face CLI login on the target host.
	- Confirm the target host is authorized for `meta-llama/Meta-Llama-3-8B`.
	- Do not distribute merged or quantized base-model weights unless license and access policy permit it.
	- Treat `evidence/paired_predictions.jsonl` as potentially sensitive model-output evidence.
	""",
	)
	write_text(
	release_dir / "docs" / "MODEL_CARD.md",
	f"""
	# Linvest21_FinGPT Model Card

	Release: `{release_id}`

	Runtime model ID: `{model_id}`

	Asset class: `{asset_class or "unspecified"}`

	Role: `{role or "unspecified"}`

	Base model: `{base_model_id}`

	Adapter source run: `{source_run_id}`

	Intended use: internal Linvest21 financial-analysis experimentation and SHFT lifecycle validation for the named super-agent role.

	Current status: exported and locally self-certified. Production promotion is controlled separately by SHFT promotion evidence and approvals.

	Known limitation: local CPU inference is slow for Llama-3-8B. This adapter-only bundle still requires authorized access to the base model.
	""",
	)


	def copy_evidence(release_dir: Path, source_run_id: str) -> dict[str, bool]:
	evidence = release_dir / "evidence"
	source_run = SHFT_WORKSPACE_ROOT / "runs" / source_run_id
	copied = {
	"candidate_manifest": copy_if_exists(source_run / "candidate_manifest.json", evidence / "candidate_manifest.json"),
	"training_result": copy_if_exists(source_run / "remote_artifacts" / "training_result.json", evidence / "training_result.json"),
	"training_plan": copy_if_exists(source_run / "remote_artifacts" / "training_plan.json", evidence / "training_plan.json"),
	"smoke_eval_manifest": copy_if_exists(
	SHFT_WORKSPACE_ROOT / "runs" / "run_step21_repair_smoke_eval_001" / "evaluation_manifest.json",
	evidence / "run_step21_repair_smoke_eval_001" / "evaluation_manifest.json",
	),
	"smoke_eval_report": copy_if_exists(
	SHFT_WORKSPACE_ROOT / "runs" / "run_step21_repair_smoke_eval_001" / "remote_artifacts" / "eval" / "paired_eval_report.json",
	evidence / "run_step21_repair_smoke_eval_001" / "paired_eval_report.json",
	),
	"full_eval_manifest": copy_if_exists(
	SHFT_WORKSPACE_ROOT / "runs" / "run_step20_repair_eval_001" / "evaluation_manifest.json",
	evidence / "run_step20_repair_eval_001" / "evaluation_manifest.json",
	),
	"full_eval_report": copy_if_exists(
	SHFT_WORKSPACE_ROOT / "runs" / "run_step20_repair_eval_001" / "remote_artifacts" / "eval" / "paired_eval_report.json",
	evidence / "run_step20_repair_eval_001" / "paired_eval_report.json",
	),
	}
	return copied


	def export_release(
	*,
	release_id: str,
	source_run_id: str = DEFAULT_SOURCE_RUN_ID,
	export_mode: str = "adapter_only",
	base_model_id: str = DEFAULT_BASE_MODEL,
	model_id: str \| None = None,
	asset_class: str \| None = None,
	role: str \| None = None,
	merged_model_dir: str \| None = None,
	gguf_model_path: str \| None = None,
	zip_release: bool = False,
	) -> dict[str, Any]:
	if export_mode not in {"adapter_only", "merged_hf", "quantized_gguf"}:
	raise ValueError(f"unsupported export mode: {export_mode}")

	release_dir = product_dir_for(release_id)
	if release_dir.exists():
	shutil.rmtree(release_dir)
	release_dir.mkdir(parents=True, exist_ok=True)

	source_run = SHFT_WORKSPACE_ROOT / "runs" / source_run_id
	adapter_src = source_run / "remote_artifacts" / "adapter"
	if not adapter_src.exists():
	raise FileNotFoundError(f"adapter source not found: {adapter_src}")

	identity = infer_release_identity(release_id, model_id=model_id, asset_class=asset_class, role=role)
	runtime_model_id = str(identity["model_id"] or release_id)
	runtime_asset_class = identity["asset_class"]
	runtime_role = identity["role"]

	copy_tree(adapter_src, release_dir / "model" / "adapter")
	copied_merged_model = False
	copied_gguf_model = False
	if merged_model_dir:
	copy_tree(Path(merged_model_dir), release_dir / "model" / "merged_hf")
	copied_merged_model = True
	if gguf_model_path:
	gguf_src = Path(gguf_model_path)
	if not gguf_src.exists():
	raise FileNotFoundError(f"GGUF model not found: {gguf_src}")
	gguf_dst = release_dir / "model" / "gguf" / gguf_src.name
	gguf_dst.parent.mkdir(parents=True, exist_ok=True)
	shutil.copy2(gguf_src, gguf_dst)
	copied_gguf_model = True
	write_runtime_files(
	release_dir,
	release_id=release_id,
	model_id=runtime_model_id,
	asset_class=runtime_asset_class,
	role=runtime_role,
	base_model_id=base_model_id,
	)
	evidence = copy_evidence(release_dir, source_run_id)
	write_release_docs(
	release_dir,
	release_id,
	export_mode,
	source_run_id,
	base_model_id,
	model_id=runtime_model_id,
	asset_class=runtime_asset_class,
	role=runtime_role,
	)

	model_payload: dict[str, Any] = {
	"adapter_path": "model/adapter",
	"base_model_id": base_model_id,
	"export_mode": export_mode,
	"self_containment_level": "adapter_plus_runtime",
	"requires_base_model_download": True,
	}
	if export_mode == "merged_hf":
	status = "completed" if copied_merged_model else "ready_to_merge"
	model_payload.update(
	{
	"merged_model_path": "model/merged_hf" if copied_merged_model else None,
	"self_containment_level": "full_hf_model" if copied_merged_model else "adapter_plus_merge_tool",
	"requires_base_model_download": False,
	"status": status,
	"note": "Merged model copied into release." if copied_merged_model else "Run tools/run_merge_hf_model.bat on a licensed high-RAM host to create model/merged_hf.",
	}
	)
	elif export_mode == "quantized_gguf":
	status = "completed" if copied_gguf_model else "ready_for_conversion"
	model_payload.update(
	{
	"gguf_model_path": f"model/gguf/{Path(gguf_model_path).name}" if gguf_model_path else None,
	"self_containment_level": "full_quantized_gguf" if copied_gguf_model else "adapter_plus_conversion_tooling",
	"requires_base_model_download": False,
	"status": status,
	"note": "GGUF model copied into release." if copied_gguf_model else "Create model/merged_hf, then use llama.cpp conversion and quantization tools.",
	}
	)

	manifest = {
	"release_id": release_id,
	"model_id": runtime_model_id,
	"asset_class": runtime_asset_class,
	"role": runtime_role,
	"created_at": utc_now(),
	"source_run_id": source_run_id,
	"source_run_dir": str(source_run.relative_to(REPO_ROOT)).replace("\\", "/"),
	"product_root": str(IMPLEMENTATION_PRODUCTS_ROOT.relative_to(REPO_ROOT)).replace("\\", "/"),
	"release_dir": str(release_dir.relative_to(REPO_ROOT)).replace("\\", "/"),
	"path_policy": {
	"write_target": "impl_codex/implementation_products/<model_id>",
	"legacy_write_enabled": False,
	},
	"model": model_payload,
	"runtime": {
	"chat_console": "runtime/chat_console.py",
	"serve_api": "runtime/serve_api.py",
	"api_self_test": "runtime/self_test_api_contract.py",
	"run_chat_cpu": "runtime/run_chat_cpu.bat",
	"run_chat_gpu": "runtime/run_chat_gpu.bat",
	"run_api_cpu": "runtime/run_api_cpu.bat",
	"run_api_gpu": "runtime/run_api_gpu.bat",
	"run_api_self_test": "runtime/run_api_self_test.bat",
	"config": "runtime/chat_config.json",
	"requirements_cpu": "runtime/requirements_cpu.txt",
	"requirements_gpu_cuda121": "runtime/requirements_gpu_cuda121.txt",
	},
	"evidence_copied": evidence,
	"promotion_status": "not_promoted",
	"distribution_policy": {
	"adapter_only_distribution": "allowed_with_internal_policy",
	"merged_or_quantized_distribution": "requires_license_and_access_review",
	"tokens_in_bundle": "forbidden",
	},
	"heavy_assets": {
	"merged_model_copied": copied_merged_model,
	"gguf_model_copied": copied_gguf_model,
	},
	}
	write_json(release_dir / "release_manifest.json", manifest)
	write_json(release_dir / "release_hashes.json", collect_hashes(release_dir))

	archive_path = None
	if zip_release:
	archive_path = shutil.make_archive(str(release_dir), "zip", root_dir=release_dir)
	result = {
	"status": "completed",
	"release_id": release_id,
	"product_dir": str(release_dir),
	"release_dir": str(release_dir),
	"export_mode": export_mode,
	"archive": archive_path,
	"manifest": str(release_dir / "release_manifest.json"),
	"path_policy": "exports write to impl_codex/implementation_products only",
	}
	write_json(SHFT_WORKSPACE_ROOT / "registry" / "releases" / f"{release_id}.json", result)
	return result


	def validate_release(release_id: str) -> dict[str, Any]:
	release_dir, storage_location = find_release_dir(release_id)
	errors: list[str] = []
	warnings: list[str] = []
	if not release_dir.exists():
	return {"ok": False, "release_id": release_id, "errors": [f"release directory not found: {release_dir}"], "warnings": []}

	required_files = [
	"README.md",
	"release_manifest.json",
	"release_hashes.json",
	"model/adapter/adapter_config.json",
	"model/adapter/adapter_model.safetensors",
	"runtime/chat_config.json",
	"runtime/chat_console.py",
	"runtime/serve_api.py",
	"runtime/self_test_api_contract.py",
	"runtime/run_chat_cpu.bat",
	"runtime/run_chat_gpu.bat",
	"runtime/run_api_cpu.bat",
	"runtime/run_api_gpu.bat",
	"runtime/run_api_self_test.bat",
	"runtime/install_cpu.bat",
	"runtime/install_gpu_cuda121.bat",
	"tools/merge_hf_model.py",
	"tools/run_merge_hf_model.bat",
	"tools/EXPORT_MODES.md",
	"docs/MODEL_CARD.md",
	"docs/SECURITY.md",
	]
	for rel in required_files:
	if not (release_dir / rel).exists():
	errors.append(f"missing required release file: {rel}")

	try:
	manifest = json.loads((release_dir / "release_manifest.json").read_text(encoding="utf-8"))
	except (FileNotFoundError, json.JSONDecodeError) as exc:
	manifest = {}
	errors.append(f"release_manifest.json is invalid: {exc}")
	else:
	if manifest.get("release_id") != release_id:
	errors.append("release manifest id does not match release directory")
	model = manifest.get("model", {})
	if model.get("export_mode") == "adapter_only" and not model.get("requires_base_model_download"):
	errors.append("adapter_only release must record that base model download/cache is required")
	if manifest.get("distribution_policy", {}).get("tokens_in_bundle") != "forbidden":
	errors.append("release distribution policy must forbid tokens in bundle")

	try:
	stored_hashes = json.loads((release_dir / "release_hashes.json").read_text(encoding="utf-8"))
	except (FileNotFoundError, json.JSONDecodeError) as exc:
	stored_hashes = {}
	errors.append(f"release_hashes.json is invalid: {exc}")
	for rel, expected in stored_hashes.items():
	path = release_dir / rel
	if not path.exists():
	errors.append(f"hashed file missing: {rel}")
	continue
	actual = sha256_file(path)
	if actual.lower() != str(expected).lower():
	errors.append(f"hash mismatch for {rel}")
	current_files = {
	str(path.relative_to(release_dir)).replace("\\", "/")
	for path in release_dir.rglob("*")
	if path.is_file() and path.name not in {"release_hashes.json", "self_test_report.json"}
	}
	missing_hashes = sorted(current_files - set(stored_hashes))
	if missing_hashes:
	warnings.append(f"files not listed in release_hashes.json: {missing_hashes}")

	archive_path = release_dir.with_suffix(".zip")
	archive_ok = False
	if archive_path.exists():
	try:
	with zipfile.ZipFile(archive_path) as archive:
	bad = archive.testzip()
	if bad:
	errors.append(f"zip archive has corrupt member: {bad}")
	else:
	archive_ok = True
	except zipfile.BadZipFile as exc:
	errors.append(f"zip archive is invalid: {exc}")
	else:
	warnings.append(f"release zip archive not found: {archive_path}")

	report = {
	"ok": not errors,
	"release_id": release_id,
	"storage_location": storage_location,
	"product_dir": str(product_dir_for(release_id)),
	"release_dir": str(release_dir),
	"archive": str(archive_path),
	"archive_ok": archive_ok,
	"required_file_count": len(required_files),
	"hashed_file_count": len(stored_hashes),
	"errors": errors,
	"warnings": warnings,
	"validated_at": utc_now(),
	}
	write_json(release_dir / "self_test_report.json", report)
	write_json(SHFT_WORKSPACE_ROOT / "registry" / "releases" / f"{release_id}_self_test.json", report)
	return report

Xet Storage Details

Size:: 47.9 kB
Xet hash:: 14d804f9a51ff11d8cfb97e7bcdb37988aa7cf184f65173aa0ff260c0e4bd327

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.