Chan-Compass / paths.py
ranranrunforit's picture
Upload 25 files
16520a2 verified
Raw
History Blame Contribute Delete
2.45 kB
"""
paths.py — persistent storage layout on the Hugging Face storage bucket (/data).
If /data exists and is writable (the Space has a storage bucket attached),
everything that should survive restarts/sleep lives there:
/data/cache_us market-data parquet cache (yfinance downloads)
/data/output signal CSVs, holdings.txt
/data/reports auto-generated research reports (markdown)
/data/traces agent traces (JSON) — shareable on the Hub (📡 badge)
/data/hf_cache GGUF model files (downloaded once, kept forever)
/data/pylibs llama-cpp-python installed once at runtime, persisted
Without a bucket the app still works — it just falls back to the container
filesystem (wiped on restart).
"""
from __future__ import annotations
import os
import sys
def _writable(p: str) -> bool:
try:
return os.path.isdir(p) and os.access(p, os.W_OK)
except OSError:
return False
DATA_ROOT = "/data" if _writable("/data") else "."
PERSISTENT = DATA_ROOT == "/data"
CACHE_DIR = os.path.join(DATA_ROOT, "cache_us")
OUTPUT_DIR = os.path.join(DATA_ROOT, "output") if PERSISTENT else "./_app_output"
REPORTS_DIR = os.path.join(DATA_ROOT, "reports")
TRACES_DIR = os.path.join(DATA_ROOT, "traces")
DATASET_DIR = os.path.join(DATA_ROOT, "dataset") # SFT training pairs (JSONL)
HF_CACHE_DIR = os.path.join(DATA_ROOT, "hf_cache")
PYLIBS_DIR = os.path.join(DATA_ROOT, "pylibs")
for _d in (CACHE_DIR, OUTPUT_DIR, REPORTS_DIR, TRACES_DIR, DATASET_DIR, HF_CACHE_DIR, PYLIBS_DIR):
try:
os.makedirs(_d, exist_ok=True)
except OSError:
pass
# GGUF downloads (hf_hub_download) go to the bucket so models persist
if PERSISTENT:
os.environ.setdefault("HF_HOME", HF_CACHE_DIR)
# llama-cpp-python installed into the bucket must be importable.
# APPEND (not insert) so bucket-installed copies of common deps (numpy etc.)
# can never shadow the system site-packages the app was built against.
if PERSISTENT and PYLIBS_DIR not in sys.path:
sys.path.append(PYLIBS_DIR)
def storage_status() -> str:
if PERSISTENT:
return ("✅ Persistent storage bucket mounted at `/data` — data cache, "
"model files, reports, traces and the llama.cpp runtime all "
"survive restarts.")
return ("⚠️ No `/data` bucket detected — running on ephemeral container "
"storage (everything re-downloads after a restart).")