Spaces:
Running
Running
| """ | |
| Input validation and sanitization for backend services. | |
| Provides defensive validation layers for all external inputs | |
| to ensure system security and data integrity. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from pathlib import Path | |
| from typing import Optional | |
| # YouTube video ID format: 11 alphanumeric characters plus _ and - | |
| # This has been stable since 2006 but could theoretically change | |
| VIDEO_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]{11}$') | |
| VIDEO_ID_LENGTH = 11 | |
| ALLOWED_AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.ogg', '.m4a', '.webm', '.opus'} | |
| def validate_video_id(video_id: str) -> bool: | |
| """ | |
| Validate YouTube video ID format. | |
| YouTube video IDs are currently 11 characters long, consisting of | |
| alphanumeric characters, underscores, and hyphens. This format has | |
| been stable since 2006. | |
| Args: | |
| video_id: Video identifier to validate | |
| Returns: | |
| True if valid format, False otherwise | |
| """ | |
| if not video_id or not isinstance(video_id, str): | |
| return False | |
| if len(video_id) != VIDEO_ID_LENGTH: | |
| return False | |
| return bool(VIDEO_ID_PATTERN.match(video_id)) | |
| def validate_url(url: str) -> bool: | |
| """ | |
| Validate URL format and allowed domains. | |
| Args: | |
| url: URL string to validate | |
| Returns: | |
| True if valid and safe, False otherwise | |
| """ | |
| if not url or not isinstance(url, str): | |
| return False | |
| url = url.strip() | |
| if not url.startswith(('http://', 'https://')): | |
| return False | |
| if len(url) > 2048: | |
| return False | |
| dangerous_chars = ['<', '>', '"', "'", '`', '{', '}'] | |
| if any(char in url for char in dangerous_chars): | |
| return False | |
| from urllib.parse import urlparse | |
| try: | |
| parsed = urlparse(url) | |
| host = (parsed.hostname or '').lower() | |
| except Exception: | |
| return False | |
| allowed_hosts = { | |
| 'youtube.com', | |
| 'www.youtube.com', | |
| 'm.youtube.com', | |
| 'music.youtube.com', | |
| 'youtu.be', | |
| 'www.youtu.be', | |
| 'spotify.com', | |
| 'www.spotify.com', | |
| 'open.spotify.com', | |
| } | |
| if host not in allowed_hosts: | |
| return False | |
| return True | |
| def validate_audio_path(path: Path) -> tuple[bool, Optional[str]]: | |
| """ | |
| Validate audio file path for security and format. | |
| Args: | |
| path: File path to validate | |
| Returns: | |
| Tuple of (is_valid, error_message) | |
| """ | |
| if not path.exists(): | |
| return False, "file_not_found" | |
| if not path.is_file(): | |
| return False, "not_a_file" | |
| try: | |
| # resolve with strict=True validates path and prevents traversal | |
| resolved = path.resolve(strict=True) | |
| except (OSError, RuntimeError): | |
| return False, "invalid_path" | |
| extension = path.suffix.lower() | |
| if extension not in ALLOWED_AUDIO_EXTENSIONS: | |
| return False, f"unsupported_format_{extension}" | |
| try: | |
| file_size = path.stat().st_size | |
| if file_size < 1024: | |
| return False, "file_too_small" | |
| if file_size > 100 * 1024 * 1024: | |
| return False, "file_too_large" | |
| except OSError: | |
| return False, "cannot_read_file" | |
| return True, None | |
| def sanitize_filename(filename: str) -> str: | |
| """ | |
| Sanitize filename to prevent directory traversal and injection. | |
| Args: | |
| filename: Raw filename from user input | |
| Returns: | |
| Sanitized filename safe for use | |
| """ | |
| if not filename: | |
| return "unnamed" | |
| filename = filename.strip() | |
| dangerous_patterns = ['..', '/', '\\', '\x00', '\n', '\r'] | |
| for pattern in dangerous_patterns: | |
| filename = filename.replace(pattern, '_') | |
| filename = re.sub(r'[<>:"|?*]', '_', filename) | |
| if len(filename) > 255: | |
| name_part = filename[:200] | |
| ext_part = Path(filename).suffix[:55] | |
| filename = name_part + ext_part | |
| if not filename or filename in {'.', '..'}: | |
| filename = "unnamed" | |
| return filename | |
| def validate_threshold(value: float) -> bool: | |
| """ | |
| Validate threshold value is in acceptable range. | |
| Args: | |
| value: Threshold value to validate | |
| Returns: | |
| True if valid, False otherwise | |
| """ | |
| if not isinstance(value, (int, float)): | |
| return False | |
| return 0.0 <= value <= 1.0 | |
| def validate_timeout(seconds: float) -> bool: | |
| """ | |
| Validate timeout value is reasonable. | |
| Args: | |
| seconds: Timeout value in seconds | |
| Returns: | |
| True if valid, False otherwise | |
| """ | |
| if not isinstance(seconds, (int, float)): | |
| return False | |
| return 1.0 <= seconds <= 300.0 | |