Spaces:

Rthur2003
/

crowncode-backend

Running

App Files Files Community

crowncode-backend / app /services /url_parser.py

Rthur2003

fix: enhance CORS origin handling and update validation for YouTube URLs

cbff5e0 3 months ago

raw

history blame contribute delete

2.75 kB

	"""
	YouTube URL parsing helpers with enhanced validation.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	import re
	from typing import Optional
	from urllib.parse import parse_qs, urlparse

	from .validation import validate_video_id, validate_url


	@dataclass(frozen=True)
	class ParsedYouTubeUrl:
	video_id: str
	normalized_url: str
	start_time_sec: Optional[int] = None


	def _parse_time_offset(raw: str) -> Optional[int]:
	if not raw:
	return None
	value = raw.strip().lower()
	if value.isdigit():
	return int(value)

	total = 0
	matches = re.findall(r"(\d+)(h\|m\|s)", value)
	if not matches:
	return None

	for amount, unit in matches:
	amount_int = int(amount)
	if unit == "h":
	total += amount_int * 3600
	elif unit == "m":
	total += amount_int * 60
	elif unit == "s":
	total += amount_int
	return total


	def _extract_video_id(parsed_url) -> Optional[str]:
	host = parsed_url.netloc.lower()
	path = parsed_url.path or ""
	query = parse_qs(parsed_url.query)

	if host in {"youtu.be", "www.youtu.be"}:
	candidate = path.strip("/").split("/")[0]
	return candidate or None

	if host in {"youtube.com", "www.youtube.com", "m.youtube.com", "music.youtube.com"}:
	if path == "/watch":
	return query.get("v", [None])[0]
	if path.startswith("/shorts/") or path.startswith("/live/") or path.startswith("/embed/"):
	parts = path.strip("/").split("/")
	return parts[1] if len(parts) > 1 else None

	return None


	def parse_youtube_url(url: str) -> ParsedYouTubeUrl:
	if not url or not url.strip():
	raise ValueError("URL is empty.")

	if not validate_url(url):
	raise ValueError("Invalid or unsafe URL format.")

	parsed = urlparse(url.strip())
	if parsed.scheme not in {"http", "https"}:
	raise ValueError("URL must start with http:// or https://")

	video_id = _extract_video_id(parsed)
	if not video_id:
	raise ValueError("Invalid or missing YouTube video ID.")

	if not validate_video_id(video_id):
	raise ValueError("Invalid video ID format.")

	query = parse_qs(parsed.query)
	start_raw = query.get("t", [None])[0] or query.get("start", [None])[0] or query.get("time_continue", [None])[0]
	start_time_sec = _parse_time_offset(start_raw) if start_raw else None

	normalized_url = f"https://www.youtube.com/watch?v={video_id}"
	if start_time_sec:
	normalized_url = f"{normalized_url}&t={start_time_sec}"

	return ParsedYouTubeUrl(
	video_id=video_id,
	normalized_url=normalized_url,
	start_time_sec=start_time_sec,
	)