File size: 4,813 Bytes
7ac6163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbff5e0
 
 
 
 
 
 
 
 
7ac6163
cbff5e0
 
7ac6163
cbff5e0
 
7ac6163
cbff5e0
 
 
 
 
7ac6163
cbff5e0
7ac6163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""
Input validation and sanitization for backend services.

Provides defensive validation layers for all external inputs
to ensure system security and data integrity.
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Optional


# YouTube video ID format: 11 alphanumeric characters plus _ and -
# This has been stable since 2006 but could theoretically change
VIDEO_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]{11}$')
VIDEO_ID_LENGTH = 11

ALLOWED_AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.ogg', '.m4a', '.webm', '.opus'}


def validate_video_id(video_id: str) -> bool:
    """
    Validate YouTube video ID format.
    
    YouTube video IDs are currently 11 characters long, consisting of
    alphanumeric characters, underscores, and hyphens. This format has
    been stable since 2006.
    
    Args:
        video_id: Video identifier to validate
        
    Returns:
        True if valid format, False otherwise
    """
    if not video_id or not isinstance(video_id, str):
        return False
    
    if len(video_id) != VIDEO_ID_LENGTH:
        return False
        
    return bool(VIDEO_ID_PATTERN.match(video_id))


def validate_url(url: str) -> bool:
    """
    Validate URL format and allowed domains.
    
    Args:
        url: URL string to validate
        
    Returns:
        True if valid and safe, False otherwise
    """
    if not url or not isinstance(url, str):
        return False
    
    url = url.strip()
    
    if not url.startswith(('http://', 'https://')):
        return False
    
    if len(url) > 2048:
        return False
    
    dangerous_chars = ['<', '>', '"', "'", '`', '{', '}']
    if any(char in url for char in dangerous_chars):
        return False
    
    from urllib.parse import urlparse

    try:
        parsed = urlparse(url)
        host = (parsed.hostname or '').lower()
    except Exception:
        return False

    allowed_hosts = {
        'youtube.com',
        'www.youtube.com',
        'm.youtube.com',
        'music.youtube.com',
        'youtu.be',
        'www.youtu.be',
        'spotify.com',
        'www.spotify.com',
        'open.spotify.com',
    }

    if host not in allowed_hosts:
        return False

    return True


def validate_audio_path(path: Path) -> tuple[bool, Optional[str]]:
    """
    Validate audio file path for security and format.
    
    Args:
        path: File path to validate
        
    Returns:
        Tuple of (is_valid, error_message)
    """
    if not path.exists():
        return False, "file_not_found"
    
    if not path.is_file():
        return False, "not_a_file"
    
    try:
        # resolve with strict=True validates path and prevents traversal
        resolved = path.resolve(strict=True)
        
    except (OSError, RuntimeError):
        return False, "invalid_path"
    
    extension = path.suffix.lower()
    if extension not in ALLOWED_AUDIO_EXTENSIONS:
        return False, f"unsupported_format_{extension}"
    
    try:
        file_size = path.stat().st_size
        
        if file_size < 1024:
            return False, "file_too_small"
        
        if file_size > 100 * 1024 * 1024:
            return False, "file_too_large"
            
    except OSError:
        return False, "cannot_read_file"
    
    return True, None


def sanitize_filename(filename: str) -> str:
    """
    Sanitize filename to prevent directory traversal and injection.
    
    Args:
        filename: Raw filename from user input
        
    Returns:
        Sanitized filename safe for use
    """
    if not filename:
        return "unnamed"
    
    filename = filename.strip()
    
    dangerous_patterns = ['..', '/', '\\', '\x00', '\n', '\r']
    for pattern in dangerous_patterns:
        filename = filename.replace(pattern, '_')
    
    filename = re.sub(r'[<>:"|?*]', '_', filename)
    
    if len(filename) > 255:
        name_part = filename[:200]
        ext_part = Path(filename).suffix[:55]
        filename = name_part + ext_part
    
    if not filename or filename in {'.', '..'}:
        filename = "unnamed"
    
    return filename


def validate_threshold(value: float) -> bool:
    """
    Validate threshold value is in acceptable range.
    
    Args:
        value: Threshold value to validate
        
    Returns:
        True if valid, False otherwise
    """
    if not isinstance(value, (int, float)):
        return False
    
    return 0.0 <= value <= 1.0


def validate_timeout(seconds: float) -> bool:
    """
    Validate timeout value is reasonable.
    
    Args:
        seconds: Timeout value in seconds
        
    Returns:
        True if valid, False otherwise
    """
    if not isinstance(seconds, (int, float)):
        return False
    
    return 1.0 <= seconds <= 300.0