Spaces:
Sleeping
Sleeping
File size: 4,813 Bytes
7ac6163 cbff5e0 7ac6163 cbff5e0 7ac6163 cbff5e0 7ac6163 cbff5e0 7ac6163 cbff5e0 7ac6163 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | """
Input validation and sanitization for backend services.
Provides defensive validation layers for all external inputs
to ensure system security and data integrity.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Optional
# YouTube video ID format: 11 alphanumeric characters plus _ and -
# This has been stable since 2006 but could theoretically change
VIDEO_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]{11}$')
VIDEO_ID_LENGTH = 11
ALLOWED_AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.ogg', '.m4a', '.webm', '.opus'}
def validate_video_id(video_id: str) -> bool:
"""
Validate YouTube video ID format.
YouTube video IDs are currently 11 characters long, consisting of
alphanumeric characters, underscores, and hyphens. This format has
been stable since 2006.
Args:
video_id: Video identifier to validate
Returns:
True if valid format, False otherwise
"""
if not video_id or not isinstance(video_id, str):
return False
if len(video_id) != VIDEO_ID_LENGTH:
return False
return bool(VIDEO_ID_PATTERN.match(video_id))
def validate_url(url: str) -> bool:
"""
Validate URL format and allowed domains.
Args:
url: URL string to validate
Returns:
True if valid and safe, False otherwise
"""
if not url or not isinstance(url, str):
return False
url = url.strip()
if not url.startswith(('http://', 'https://')):
return False
if len(url) > 2048:
return False
dangerous_chars = ['<', '>', '"', "'", '`', '{', '}']
if any(char in url for char in dangerous_chars):
return False
from urllib.parse import urlparse
try:
parsed = urlparse(url)
host = (parsed.hostname or '').lower()
except Exception:
return False
allowed_hosts = {
'youtube.com',
'www.youtube.com',
'm.youtube.com',
'music.youtube.com',
'youtu.be',
'www.youtu.be',
'spotify.com',
'www.spotify.com',
'open.spotify.com',
}
if host not in allowed_hosts:
return False
return True
def validate_audio_path(path: Path) -> tuple[bool, Optional[str]]:
"""
Validate audio file path for security and format.
Args:
path: File path to validate
Returns:
Tuple of (is_valid, error_message)
"""
if not path.exists():
return False, "file_not_found"
if not path.is_file():
return False, "not_a_file"
try:
# resolve with strict=True validates path and prevents traversal
resolved = path.resolve(strict=True)
except (OSError, RuntimeError):
return False, "invalid_path"
extension = path.suffix.lower()
if extension not in ALLOWED_AUDIO_EXTENSIONS:
return False, f"unsupported_format_{extension}"
try:
file_size = path.stat().st_size
if file_size < 1024:
return False, "file_too_small"
if file_size > 100 * 1024 * 1024:
return False, "file_too_large"
except OSError:
return False, "cannot_read_file"
return True, None
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent directory traversal and injection.
Args:
filename: Raw filename from user input
Returns:
Sanitized filename safe for use
"""
if not filename:
return "unnamed"
filename = filename.strip()
dangerous_patterns = ['..', '/', '\\', '\x00', '\n', '\r']
for pattern in dangerous_patterns:
filename = filename.replace(pattern, '_')
filename = re.sub(r'[<>:"|?*]', '_', filename)
if len(filename) > 255:
name_part = filename[:200]
ext_part = Path(filename).suffix[:55]
filename = name_part + ext_part
if not filename or filename in {'.', '..'}:
filename = "unnamed"
return filename
def validate_threshold(value: float) -> bool:
"""
Validate threshold value is in acceptable range.
Args:
value: Threshold value to validate
Returns:
True if valid, False otherwise
"""
if not isinstance(value, (int, float)):
return False
return 0.0 <= value <= 1.0
def validate_timeout(seconds: float) -> bool:
"""
Validate timeout value is reasonable.
Args:
seconds: Timeout value in seconds
Returns:
True if valid, False otherwise
"""
if not isinstance(seconds, (int, float)):
return False
return 1.0 <= seconds <= 300.0
|