DocumentVault / server /utils /validators.py
mohsin-devs's picture
Deploy HF-ready DocVault with HF storage backend
2fe2727
"""Utility functions for path validation and security."""
import os
import re
class PathValidator:
"""Validates and sanitizes user-provided file and folder paths."""
@staticmethod
def _normalize_relative_path(path: str) -> str:
if not isinstance(path, str):
return ""
normalized = re.sub(r"[\\/]+", "/", path).strip("/ ")
return normalized
@staticmethod
def is_valid_filename(filename: str) -> bool:
if not filename or not isinstance(filename, str):
return False
filename = filename.strip()
if not filename or len(filename) > 250:
return False
if ".." in filename or "/" in filename or "\\" in filename:
return False
invalid_chars = set('<>:"/\\|?*')
if any((char in invalid_chars or ord(char) < 32) for char in filename):
return False
reserved = {
"CON",
"PRN",
"AUX",
"NUL",
"COM1",
"COM2",
"COM3",
"COM4",
"COM5",
"COM6",
"COM7",
"COM8",
"COM9",
"LPT1",
"LPT2",
"LPT3",
"LPT4",
"LPT5",
"LPT6",
"LPT7",
"LPT8",
"LPT9",
}
if filename.split(".")[0].upper() in reserved:
return False
return True
@staticmethod
def is_valid_path(path: str) -> bool:
normalized = PathValidator._normalize_relative_path(path or "")
if normalized == "":
return True
parts = normalized.split("/")
return all(PathValidator.is_valid_filename(part) for part in parts)
@staticmethod
def validate_folder_structure(path_parts: list[str]) -> bool:
return all(PathValidator.is_valid_filename(part) for part in path_parts if part)
def sanitize_filename(filename: str) -> str:
if not filename:
return "file"
sanitized = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", filename)
sanitized = sanitized.strip(". ")
return sanitized or "file"
def format_file_size(size_bytes: int) -> str:
if size_bytes == 0:
return "0 B"
size = float(size_bytes)
for unit in ["B", "KB", "MB", "GB"]:
if size < 1024.0:
return f"{size:.2f} {unit}"
size /= 1024.0
return f"{size:.2f} TB"
def get_file_extension(filename: str) -> str:
return os.path.splitext(filename)[1].lstrip(".").lower()