Spaces:
Starting
Starting
| """ | |
| src/storage/paths.py — Path normalisation and ID utilities (A2.3). | |
| Copied from src/functions.py — do NOT delete the originals (backward compat). | |
| """ | |
| import os | |
| import time | |
| import uuid | |
| import hashlib | |
| # Maximum allowed length for folder paths and agent IDs. | |
| _MAX_PATH_LEN = 512 | |
| def generate_id(prefix: str) -> str: | |
| """Generate a time-sortable unique ID with a human-readable prefix. | |
| Format: ``{prefix}_{base36_timestamp}_{12_hex_chars}`` | |
| """ | |
| t = int(time.time() * 1000) | |
| chars = "0123456789abcdefghijklmnopqrstuvwxyz" | |
| ts_str = "" | |
| while t > 0: | |
| ts_str = chars[t % 36] + ts_str | |
| t //= 36 | |
| if not ts_str: | |
| ts_str = "0" | |
| rand = uuid.uuid4().hex[:12] | |
| return f"{prefix}_{ts_str}_{rand}" | |
| def fingerprint_id(prefix: str, content: str) -> str: | |
| """Generate a deterministic ID by SHA-256 fingerprinting *content*. | |
| Format: ``{prefix}_{first_16_hex_chars_of_sha256}`` | |
| """ | |
| h = hashlib.sha256(content.strip().lower().encode("utf-8")).hexdigest() | |
| return f"{prefix}_{h[:16]}" | |
| def normalize_folder_path(path: str) -> str: | |
| """Normalize a folder path for safe use in KV scope keys. | |
| Steps applied in order: | |
| 1. Cap the raw input at 512 characters (REQ-066). | |
| 2. Apply ``os.path.normpath`` to collapse redundant separators and | |
| resolve any ``..`` components at the OS level. | |
| 3. Convert all OS-native separators to forward slashes. | |
| 4. Strip any remaining leading or trailing slashes. | |
| Raises: | |
| ValueError: if *path* is empty (before or after normalization), or | |
| if the normalized result still contains a ``..`` segment, | |
| which would indicate an attempt at path traversal | |
| (REQ-064). | |
| Returns: | |
| A non-empty, forward-slash-separated string with no leading/trailing | |
| slashes and no ``..`` segments — safe for use as a KV scope fragment. | |
| Property (REQ-074): idempotent — applying this function twice yields | |
| the same result as applying it once. | |
| """ | |
| if not path: | |
| raise ValueError("folder_path must not be empty") | |
| # 1. Length cap before any processing. | |
| path = path[:_MAX_PATH_LEN] | |
| # Pre-normalisation traversal check: reject any path that contains a ".." | |
| # component in the raw input before normpath has a chance to resolve it. | |
| raw_parts = path.replace("\\", "/").split("/") | |
| if any(part == ".." for part in raw_parts): | |
| raise ValueError( | |
| f"folder_path contains path traversal segment '..': {path!r}" | |
| ) | |
| # 2. OS-level normalisation (resolves duplicate separators, etc.) | |
| normalized = os.path.normpath(path) | |
| # 3. Unify separators to forward slash. | |
| normalized = normalized.replace("\\", "/") | |
| # 4. Strip leading / trailing slashes. | |
| normalized = normalized.strip("/") | |
| # Guard: also reject any ".." that somehow survives normalisation. | |
| parts = normalized.split("/") | |
| if any(part == ".." for part in parts): | |
| raise ValueError( | |
| f"folder_path contains path traversal segment '..': {path!r}" | |
| ) | |
| if not normalized: | |
| raise ValueError("folder_path is empty after normalization") | |
| return normalized | |
| def validate_agent_id(agent_id: str) -> str: | |
| """Validate and sanitize an agent_id before use in KV scope keys. | |
| Strips surrounding whitespace and caps at 512 characters (REQ-066). | |
| Raises: | |
| ValueError: if *agent_id* is empty after stripping. | |
| Returns: | |
| Sanitized agent_id string. | |
| """ | |
| if not agent_id: | |
| raise ValueError("agent_id must not be empty") | |
| sanitized = agent_id.strip()[:_MAX_PATH_LEN] | |
| if not sanitized: | |
| raise ValueError("agent_id is empty after stripping whitespace") | |
| return sanitized | |