""" paths.py — persistent storage layout on the Hugging Face storage bucket (/data). If /data exists and is writable (the Space has a storage bucket attached), everything that should survive restarts/sleep lives there: /data/cache_us market-data parquet cache (yfinance downloads) /data/output signal CSVs, holdings.txt /data/reports auto-generated research reports (markdown) /data/traces agent traces (JSON) — shareable on the Hub (📡 badge) /data/hf_cache GGUF model files (downloaded once, kept forever) /data/pylibs llama-cpp-python installed once at runtime, persisted Without a bucket the app still works — it just falls back to the container filesystem (wiped on restart). """ from __future__ import annotations import os import sys def _writable(p: str) -> bool: try: return os.path.isdir(p) and os.access(p, os.W_OK) except OSError: return False DATA_ROOT = "/data" if _writable("/data") else "." PERSISTENT = DATA_ROOT == "/data" CACHE_DIR = os.path.join(DATA_ROOT, "cache_us") OUTPUT_DIR = os.path.join(DATA_ROOT, "output") if PERSISTENT else "./_app_output" REPORTS_DIR = os.path.join(DATA_ROOT, "reports") TRACES_DIR = os.path.join(DATA_ROOT, "traces") DATASET_DIR = os.path.join(DATA_ROOT, "dataset") # SFT training pairs (JSONL) HF_CACHE_DIR = os.path.join(DATA_ROOT, "hf_cache") PYLIBS_DIR = os.path.join(DATA_ROOT, "pylibs") for _d in (CACHE_DIR, OUTPUT_DIR, REPORTS_DIR, TRACES_DIR, DATASET_DIR, HF_CACHE_DIR, PYLIBS_DIR): try: os.makedirs(_d, exist_ok=True) except OSError: pass # GGUF downloads (hf_hub_download) go to the bucket so models persist if PERSISTENT: os.environ.setdefault("HF_HOME", HF_CACHE_DIR) # llama-cpp-python installed into the bucket must be importable. # APPEND (not insert) so bucket-installed copies of common deps (numpy etc.) # can never shadow the system site-packages the app was built against. if PERSISTENT and PYLIBS_DIR not in sys.path: sys.path.append(PYLIBS_DIR) def storage_status() -> str: if PERSISTENT: return ("✅ Persistent storage bucket mounted at `/data` — data cache, " "model files, reports, traces and the llama.cpp runtime all " "survive restarts.") return ("⚠️ No `/data` bucket detected — running on ephemeral container " "storage (everything re-downloads after a restart).")