| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | import { spawn, execSync } from 'child_process';
|
| | import fs from 'fs';
|
| | import path from 'path';
|
| | import https from 'https';
|
| | import http from 'http';
|
| | import os from 'os';
|
| | import type { ToolRegistry, ToolParams, ToolResult, ProgressEmitter } from '../toolRegistry';
|
| |
|
| | const OUTPUT_DIR = path.join(__dirname, '..', '..', 'output');
|
| |
|
| | export function register(registry: ToolRegistry): void {
|
| | registry.register({
|
| | name: 'transcript_tool',
|
| | description: 'Extract transcript/subtitles from a YouTube video URL.',
|
| | syntax: 'use <transcript_tool> <youtube-url>',
|
| | pattern: /use\s+<transcript_tool>\s+(?<url>https?:\/\/(?:www\.)?(?:youtube\.com\/watch\?v=|youtu\.be\/)[\w-]+[^\s]*)/i,
|
| | mock: false,
|
| |
|
| | async execute(params: ToolParams, emitProgress: ProgressEmitter): Promise<ToolResult> {
|
| | const url = params.url || (params.captures && params.captures[0]);
|
| | if (!url) throw new Error('No URL provided.');
|
| |
|
| | emitProgress('Extracting video ID...');
|
| | const videoId = extractVideoId(url as string);
|
| | if (!videoId) throw new Error('Invalid YouTube URL.');
|
| |
|
| | if (!fs.existsSync(OUTPUT_DIR)) {
|
| | fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
| | }
|
| |
|
| | emitProgress(`Video ID: ${videoId}`);
|
| |
|
| |
|
| | const ytdlpPath = await findYtdlp(emitProgress);
|
| | if (ytdlpPath) {
|
| | emitProgress(`yt-dlp found at: ${ytdlpPath}`);
|
| | try {
|
| | const result = await fetchWithYtdlp(ytdlpPath, url as string, videoId, emitProgress);
|
| | if (result.transcript && (result.transcript as string).trim().length > 0) {
|
| | return result;
|
| | }
|
| | emitProgress('yt-dlp returned empty subtitles. Trying fallback...');
|
| | } catch (err: any) {
|
| | emitProgress(`yt-dlp failed: ${err.message}. Trying fallback...`);
|
| | }
|
| | } else {
|
| | emitProgress('yt-dlp not found. Using YouTube API fallback...');
|
| | }
|
| |
|
| |
|
| | try {
|
| | const result = await fetchWithInnertube(videoId, emitProgress);
|
| | if (result.transcript && (result.transcript as string).trim().length > 0) {
|
| | return result;
|
| | }
|
| | emitProgress('Innertube returned empty. Trying page scrape...');
|
| | } catch (err: any) {
|
| | emitProgress(`Innertube failed: ${err.message}. Trying page scrape...`);
|
| | }
|
| |
|
| |
|
| | try {
|
| | const result = await fetchFromPage(videoId, emitProgress);
|
| | if (result.transcript && (result.transcript as string).trim().length > 0) {
|
| | return result;
|
| | }
|
| | } catch (err: any) {
|
| | emitProgress(`Page scrape failed: ${err.message}`);
|
| | }
|
| |
|
| | throw new Error('Could not extract transcript. The video may not have captions, or YouTube blocked the request. Install yt-dlp for best results: pip install yt-dlp');
|
| | },
|
| | });
|
| | }
|
| |
|
| | function extractVideoId(url: string): string | null {
|
| | const patterns = [/[?&]v=([\w-]{11})/, /youtu\.be\/([\w-]{11})/, /embed\/([\w-]{11})/];
|
| | for (const p of patterns) {
|
| | const m = url.match(p);
|
| | if (m) return m[1];
|
| | }
|
| | return null;
|
| | }
|
| |
|
| |
|
| | async function findYtdlp(emitProgress: ProgressEmitter): Promise<string | null> {
|
| | const isWin = process.platform === 'win32';
|
| | const exe = isWin ? 'yt-dlp.exe' : 'yt-dlp';
|
| |
|
| |
|
| | const onPath = await checkCommand('yt-dlp');
|
| | if (onPath) return 'yt-dlp';
|
| |
|
| | emitProgress('yt-dlp not on PATH. Searching Python Scripts directories...');
|
| |
|
| |
|
| | const home = os.homedir();
|
| | const candidateDirs: string[] = [];
|
| |
|
| | if (isWin) {
|
| |
|
| | candidateDirs.push(
|
| | path.join(home, 'AppData', 'Local', 'Programs', 'Python', 'Python313', 'Scripts'),
|
| | path.join(home, 'AppData', 'Local', 'Programs', 'Python', 'Python312', 'Scripts'),
|
| | path.join(home, 'AppData', 'Local', 'Programs', 'Python', 'Python311', 'Scripts'),
|
| | path.join(home, 'AppData', 'Local', 'Programs', 'Python', 'Python310', 'Scripts'),
|
| | path.join(home, 'AppData', 'Roaming', 'Python', 'Python313', 'Scripts'),
|
| | path.join(home, 'AppData', 'Roaming', 'Python', 'Python312', 'Scripts'),
|
| | path.join(home, 'AppData', 'Roaming', 'Python', 'Python311', 'Scripts'),
|
| | );
|
| |
|
| |
|
| | try {
|
| | const packagesDir = path.join(home, 'AppData', 'Local', 'Packages');
|
| | if (fs.existsSync(packagesDir)) {
|
| | const entries = fs.readdirSync(packagesDir);
|
| | for (const entry of entries) {
|
| | if (entry.startsWith('PythonSoftwareFoundation.Python')) {
|
| |
|
| | const localCache = path.join(packagesDir, entry, 'LocalCache', 'local-packages');
|
| | if (fs.existsSync(localCache)) {
|
| | const pyDirs = fs.readdirSync(localCache).filter(d => d.startsWith('Python'));
|
| | for (const pyDir of pyDirs) {
|
| | candidateDirs.push(path.join(localCache, pyDir, 'Scripts'));
|
| | }
|
| | }
|
| | }
|
| | }
|
| | }
|
| | } catch { }
|
| |
|
| |
|
| | try {
|
| | const pipOutput = execSync('pip show yt-dlp 2>nul', { encoding: 'utf-8', timeout: 5000 });
|
| | const locMatch = pipOutput.match(/Location:\s*(.+)/i);
|
| | if (locMatch) {
|
| | const sitePackages = locMatch[1].trim();
|
| |
|
| | const scriptsDir = path.join(path.dirname(sitePackages), 'Scripts');
|
| | candidateDirs.unshift(scriptsDir);
|
| | }
|
| | } catch { }
|
| |
|
| |
|
| | try {
|
| | const pipOutput = execSync('python -m pip show yt-dlp 2>nul', { encoding: 'utf-8', timeout: 5000 });
|
| | const locMatch = pipOutput.match(/Location:\s*(.+)/i);
|
| | if (locMatch) {
|
| | const sitePackages = locMatch[1].trim();
|
| | const scriptsDir = path.join(path.dirname(sitePackages), 'Scripts');
|
| | candidateDirs.unshift(scriptsDir);
|
| | }
|
| | } catch { }
|
| | } else {
|
| |
|
| | candidateDirs.push(
|
| | path.join(home, '.local', 'bin'),
|
| | '/usr/local/bin',
|
| | '/usr/bin',
|
| | );
|
| | }
|
| |
|
| |
|
| | for (const dir of candidateDirs) {
|
| | const fullPath = path.join(dir, exe);
|
| | if (fs.existsSync(fullPath)) {
|
| | emitProgress(`Found yt-dlp at: ${fullPath}`);
|
| |
|
| | const works = await checkCommand(`"${fullPath}"`);
|
| | if (works) return `"${fullPath}"`;
|
| | }
|
| | }
|
| |
|
| |
|
| | const pyModule = await checkCommand('python -m yt_dlp');
|
| | if (pyModule) {
|
| | emitProgress('Found yt-dlp as Python module.');
|
| | return 'python -m yt_dlp';
|
| | }
|
| |
|
| | return null;
|
| | }
|
| |
|
| | function checkCommand(cmd: string): Promise<boolean> {
|
| | return new Promise((resolve) => {
|
| | const proc = spawn(cmd, ['--version'], { shell: true });
|
| | let resolved = false;
|
| | const timeout = setTimeout(() => { if (!resolved) { resolved = true; resolve(false); try { proc.kill(); } catch { } } }, 5000);
|
| | proc.on('close', (code) => { if (!resolved) { resolved = true; clearTimeout(timeout); resolve(code === 0); } });
|
| | proc.on('error', () => { if (!resolved) { resolved = true; clearTimeout(timeout); resolve(false); } });
|
| | });
|
| | }
|
| |
|
| |
|
| | function fetchWithYtdlp(ytdlpCmd: string, url: string, videoId: string, emitProgress: ProgressEmitter): Promise<ToolResult> {
|
| | return new Promise((resolve, reject) => {
|
| | const outTemplate = path.join(OUTPUT_DIR, videoId);
|
| |
|
| |
|
| | const cmdLine = `${ytdlpCmd} --write-auto-sub --write-sub --sub-lang en,en-US,en-GB --skip-download --sub-format vtt/srt/best -o "${outTemplate}" "${url}"`;
|
| |
|
| | const child = spawn(cmdLine, [], { shell: true });
|
| | let stderr = '';
|
| |
|
| | child.stdout?.on('data', (chunk: Buffer) => emitProgress(chunk.toString().trim()));
|
| | child.stderr?.on('data', (chunk: Buffer) => { stderr += chunk.toString(); });
|
| |
|
| | child.on('close', (code) => {
|
| | if (code !== 0) return reject(new Error(`yt-dlp exited ${code}: ${stderr}`));
|
| |
|
| |
|
| | const files = fs.readdirSync(OUTPUT_DIR).filter((f) =>
|
| | f.startsWith(videoId) && (f.endsWith('.vtt') || f.endsWith('.srt'))
|
| | );
|
| | if (!files.length) return reject(new Error('No subtitle file generated.'));
|
| |
|
| | const subContent = fs.readFileSync(path.join(OUTPUT_DIR, files[0]), 'utf-8');
|
| | const text = files[0].endsWith('.srt') ? parseSrt(subContent) : parseVtt(subContent);
|
| | const fname = `${videoId}-transcript.txt`;
|
| | fs.writeFileSync(path.join(OUTPUT_DIR, fname), text, 'utf-8');
|
| |
|
| | emitProgress(`Transcript saved: ${fname} (${text.length} chars)`);
|
| | resolve({ transcript: text, downloadUrl: `/api/download/${fname}`, filename: fname, method: 'yt-dlp' });
|
| | });
|
| |
|
| | child.on('error', reject);
|
| | });
|
| | }
|
| |
|
| |
|
| | async function fetchWithInnertube(videoId: string, emitProgress: ProgressEmitter): Promise<ToolResult> {
|
| | emitProgress('Fetching via YouTube Innertube API...');
|
| |
|
| | const body = JSON.stringify({
|
| | context: {
|
| | client: {
|
| | clientName: 'WEB',
|
| | clientVersion: '2.20240101.00.00',
|
| | hl: 'en',
|
| | gl: 'US',
|
| | },
|
| | },
|
| | videoId: videoId,
|
| | });
|
| |
|
| | const responseText = await httpPost(
|
| | 'https://www.youtube.com/youtubei/v1/get_transcript?prettyPrint=false',
|
| | body,
|
| | { 'Content-Type': 'application/json' }
|
| | );
|
| |
|
| |
|
| | const lines: string[] = [];
|
| | try {
|
| | const data = JSON.parse(responseText);
|
| | const actions = data?.actions;
|
| | if (actions) {
|
| | for (const action of actions) {
|
| | const segments = action?.updateEngagementPanelAction?.content?.transcriptRenderer
|
| | ?.body?.transcriptBodyRenderer?.cueGroups;
|
| | if (segments) {
|
| | for (const seg of segments) {
|
| | const cues = seg?.transcriptCueGroupRenderer?.cues;
|
| | if (cues) {
|
| | for (const cue of cues) {
|
| | const text = cue?.transcriptCueRenderer?.cue?.simpleText;
|
| | if (text) lines.push(text.trim());
|
| | }
|
| | }
|
| | }
|
| | }
|
| | }
|
| | }
|
| | } catch {
|
| |
|
| | }
|
| |
|
| | if (lines.length === 0) {
|
| | throw new Error('Innertube returned no transcript data.');
|
| | }
|
| |
|
| | const text = lines.join('\n');
|
| | const fname = `${videoId}-transcript.txt`;
|
| | fs.writeFileSync(path.join(OUTPUT_DIR, fname), text, 'utf-8');
|
| | emitProgress(`Transcript saved: ${fname} (${text.length} chars, ${lines.length} lines)`);
|
| |
|
| | return { transcript: text, downloadUrl: `/api/download/${fname}`, filename: fname, method: 'innertube' };
|
| | }
|
| |
|
| |
|
| | async function fetchFromPage(videoId: string, emitProgress: ProgressEmitter): Promise<ToolResult> {
|
| | emitProgress('Fetching YouTube page for caption tracks...');
|
| |
|
| | const html = await httpGet(`https://www.youtube.com/watch?v=${videoId}`, {
|
| | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| | 'Accept-Language': 'en-US,en;q=0.9',
|
| | 'Cookie': 'CONSENT=YES+cb.20210328-17-p0.en+FX+999',
|
| | });
|
| |
|
| | if (!html || html.length < 1000) {
|
| | throw new Error('YouTube returned empty or blocked page.');
|
| | }
|
| |
|
| |
|
| | const patterns = [
|
| | /"captionTracks"\s*:\s*(\[.*?\])/s,
|
| | /captionTracks.*?(\[.*?\])/s,
|
| | /"playerCaptionsTracklistRenderer"\s*:\s*\{.*?"captionTracks"\s*:\s*(\[.*?\])/s,
|
| | ];
|
| |
|
| | let tracks: any[] | null = null;
|
| | for (const pattern of patterns) {
|
| | const m = html.match(pattern);
|
| | if (m) {
|
| | try {
|
| | tracks = JSON.parse(m[1]);
|
| | break;
|
| | } catch {
|
| | continue;
|
| | }
|
| | }
|
| | }
|
| |
|
| | if (!tracks || tracks.length === 0) {
|
| | throw new Error('No caption tracks found in page HTML.');
|
| | }
|
| |
|
| |
|
| | const enTrack =
|
| | tracks.find((t: any) => t.languageCode === 'en' && !t.kind) ||
|
| | tracks.find((t: any) => t.languageCode === 'en') ||
|
| | tracks.find((t: any) => t.languageCode?.startsWith('en')) ||
|
| | tracks[0];
|
| |
|
| | if (!enTrack?.baseUrl) {
|
| | throw new Error('No usable caption track URL.');
|
| | }
|
| |
|
| | emitProgress(`Found captions: ${enTrack.name?.simpleText || enTrack.languageCode} (${enTrack.kind || 'manual'})`);
|
| |
|
| |
|
| | let text = '';
|
| | try {
|
| | const json3Url = enTrack.baseUrl + (enTrack.baseUrl.includes('?') ? '&' : '?') + 'fmt=json3';
|
| | const json3Response = await httpGet(json3Url, { 'User-Agent': 'Mozilla/5.0' });
|
| | text = parseJson3Captions(json3Response);
|
| | } catch {
|
| |
|
| | }
|
| |
|
| | if (!text) {
|
| | const xmlResponse = await httpGet(enTrack.baseUrl, { 'User-Agent': 'Mozilla/5.0' });
|
| | text = parseXmlCaptions(xmlResponse);
|
| | }
|
| |
|
| | if (!text.trim()) {
|
| | throw new Error('Caption content is empty after parsing.');
|
| | }
|
| |
|
| | const fname = `${videoId}-transcript.txt`;
|
| | fs.writeFileSync(path.join(OUTPUT_DIR, fname), text, 'utf-8');
|
| | emitProgress(`Transcript saved: ${fname} (${text.length} chars)`);
|
| |
|
| | return { transcript: text, downloadUrl: `/api/download/${fname}`, filename: fname, method: 'page-scrape' };
|
| | }
|
| |
|
| |
|
| | function httpGet(url: string, headers: Record<string, string> = {}): Promise<string> {
|
| | return new Promise((resolve, reject) => {
|
| | const client = url.startsWith('https') ? https : http;
|
| | const parsed = new URL(url);
|
| | const opts = {
|
| | hostname: parsed.hostname,
|
| | port: parsed.port,
|
| | path: parsed.pathname + parsed.search,
|
| | method: 'GET',
|
| | headers: {
|
| | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
| | ...headers,
|
| | },
|
| | };
|
| |
|
| | const req = client.request(opts, (res) => {
|
| | if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
| | return httpGet(res.headers.location, headers).then(resolve).catch(reject);
|
| | }
|
| | let data = '';
|
| | res.on('data', (c: Buffer) => { data += c; });
|
| | res.on('end', () => resolve(data));
|
| | res.on('error', reject);
|
| | });
|
| | req.on('error', reject);
|
| | req.end();
|
| | });
|
| | }
|
| |
|
| | function httpPost(url: string, body: string, headers: Record<string, string> = {}): Promise<string> {
|
| | return new Promise((resolve, reject) => {
|
| | const parsed = new URL(url);
|
| | const opts = {
|
| | hostname: parsed.hostname,
|
| | port: parsed.port || 443,
|
| | path: parsed.pathname + parsed.search,
|
| | method: 'POST',
|
| | headers: {
|
| | 'Content-Type': 'application/json',
|
| | 'Content-Length': Buffer.byteLength(body),
|
| | 'User-Agent': 'Mozilla/5.0',
|
| | ...headers,
|
| | },
|
| | };
|
| |
|
| | const req = https.request(opts, (res) => {
|
| | let data = '';
|
| | res.on('data', (c: Buffer) => { data += c; });
|
| | res.on('end', () => resolve(data));
|
| | res.on('error', reject);
|
| | });
|
| | req.on('error', reject);
|
| | req.write(body);
|
| | req.end();
|
| | });
|
| | }
|
| |
|
| |
|
| | function parseVtt(vtt: string): string {
|
| | const seen = new Set<string>();
|
| | return vtt.split('\n')
|
| | .map((l) => l.trim())
|
| | .filter((l) => l && l !== 'WEBVTT' && !l.includes('-->') && !/^\d+$/.test(l) && !l.startsWith('Kind:') && !l.startsWith('Language:') && !l.startsWith('NOTE'))
|
| | .map((l) => l.replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').trim())
|
| | .filter((l) => { if (l && !seen.has(l)) { seen.add(l); return true; } return false; })
|
| | .join('\n');
|
| | }
|
| |
|
| | function parseSrt(srt: string): string {
|
| | const seen = new Set<string>();
|
| | return srt.split('\n')
|
| | .map((l) => l.trim())
|
| | .filter((l) => l && !l.includes('-->') && !/^\d+$/.test(l))
|
| | .map((l) => l.replace(/<[^>]+>/g, '').trim())
|
| | .filter((l) => { if (l && !seen.has(l)) { seen.add(l); return true; } return false; })
|
| | .join('\n');
|
| | }
|
| |
|
| | function parseXmlCaptions(xml: string): string {
|
| | const lines: string[] = [];
|
| | const re = /<text[^>]*>([\s\S]*?)<\/text>/g;
|
| | let m: RegExpExecArray | null;
|
| | while ((m = re.exec(xml)) !== null) {
|
| | const t = decodeEntities(m[1]).replace(/<[^>]+>/g, '').trim();
|
| | if (t) lines.push(t);
|
| | }
|
| | return lines.join('\n');
|
| | }
|
| |
|
| | function parseJson3Captions(json: string): string {
|
| | try {
|
| | const data = JSON.parse(json);
|
| | const events = data?.events;
|
| | if (!events) return '';
|
| |
|
| | const lines: string[] = [];
|
| | for (const event of events) {
|
| | if (event.segs) {
|
| | const text = event.segs.map((s: any) => s.utf8 || '').join('').trim();
|
| | if (text && text !== '\n') lines.push(text);
|
| | }
|
| | }
|
| | return lines.join('\n');
|
| | } catch {
|
| | return '';
|
| | }
|
| | }
|
| |
|
| | function decodeEntities(str: string): string {
|
| | return str
|
| | .replace(/&/g, '&')
|
| | .replace(/</g, '<')
|
| | .replace(/>/g, '>')
|
| | .replace(/"/g, '"')
|
| | .replace(/'/g, "'")
|
| | .replace(/'/g, "'")
|
| | .replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num, 10)));
|
| | }
|
| |
|