crowncode-backend / app /services /validation.py
Rthur2003's picture
fix: enhance CORS origin handling and update validation for YouTube URLs
cbff5e0
"""
Input validation and sanitization for backend services.
Provides defensive validation layers for all external inputs
to ensure system security and data integrity.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Optional
# YouTube video ID format: 11 alphanumeric characters plus _ and -
# This has been stable since 2006 but could theoretically change
VIDEO_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]{11}$')
VIDEO_ID_LENGTH = 11
ALLOWED_AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.ogg', '.m4a', '.webm', '.opus'}
def validate_video_id(video_id: str) -> bool:
"""
Validate YouTube video ID format.
YouTube video IDs are currently 11 characters long, consisting of
alphanumeric characters, underscores, and hyphens. This format has
been stable since 2006.
Args:
video_id: Video identifier to validate
Returns:
True if valid format, False otherwise
"""
if not video_id or not isinstance(video_id, str):
return False
if len(video_id) != VIDEO_ID_LENGTH:
return False
return bool(VIDEO_ID_PATTERN.match(video_id))
def validate_url(url: str) -> bool:
"""
Validate URL format and allowed domains.
Args:
url: URL string to validate
Returns:
True if valid and safe, False otherwise
"""
if not url or not isinstance(url, str):
return False
url = url.strip()
if not url.startswith(('http://', 'https://')):
return False
if len(url) > 2048:
return False
dangerous_chars = ['<', '>', '"', "'", '`', '{', '}']
if any(char in url for char in dangerous_chars):
return False
from urllib.parse import urlparse
try:
parsed = urlparse(url)
host = (parsed.hostname or '').lower()
except Exception:
return False
allowed_hosts = {
'youtube.com',
'www.youtube.com',
'm.youtube.com',
'music.youtube.com',
'youtu.be',
'www.youtu.be',
'spotify.com',
'www.spotify.com',
'open.spotify.com',
}
if host not in allowed_hosts:
return False
return True
def validate_audio_path(path: Path) -> tuple[bool, Optional[str]]:
"""
Validate audio file path for security and format.
Args:
path: File path to validate
Returns:
Tuple of (is_valid, error_message)
"""
if not path.exists():
return False, "file_not_found"
if not path.is_file():
return False, "not_a_file"
try:
# resolve with strict=True validates path and prevents traversal
resolved = path.resolve(strict=True)
except (OSError, RuntimeError):
return False, "invalid_path"
extension = path.suffix.lower()
if extension not in ALLOWED_AUDIO_EXTENSIONS:
return False, f"unsupported_format_{extension}"
try:
file_size = path.stat().st_size
if file_size < 1024:
return False, "file_too_small"
if file_size > 100 * 1024 * 1024:
return False, "file_too_large"
except OSError:
return False, "cannot_read_file"
return True, None
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent directory traversal and injection.
Args:
filename: Raw filename from user input
Returns:
Sanitized filename safe for use
"""
if not filename:
return "unnamed"
filename = filename.strip()
dangerous_patterns = ['..', '/', '\\', '\x00', '\n', '\r']
for pattern in dangerous_patterns:
filename = filename.replace(pattern, '_')
filename = re.sub(r'[<>:"|?*]', '_', filename)
if len(filename) > 255:
name_part = filename[:200]
ext_part = Path(filename).suffix[:55]
filename = name_part + ext_part
if not filename or filename in {'.', '..'}:
filename = "unnamed"
return filename
def validate_threshold(value: float) -> bool:
"""
Validate threshold value is in acceptable range.
Args:
value: Threshold value to validate
Returns:
True if valid, False otherwise
"""
if not isinstance(value, (int, float)):
return False
return 0.0 <= value <= 1.0
def validate_timeout(seconds: float) -> bool:
"""
Validate timeout value is reasonable.
Args:
seconds: Timeout value in seconds
Returns:
True if valid, False otherwise
"""
if not isinstance(seconds, (int, float)):
return False
return 1.0 <= seconds <= 300.0