import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN, getCorpora, getLinksRepoPath, getDocRepoPath } from '../../../utils/config.js'; /** * GET /api/progress * Returns progress stats across ALL corpora. */ export async function GET() { try { const corpora = getCorpora(); const allDocs = []; for (const corpus of corpora) { const linksPath = getLinksRepoPath(corpus); const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/${linksPath}`; const linksRes = await fetch(linksUrl, { headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }, next: { revalidate: 300 } }); if (!linksRes.ok) continue; const links = await linksRes.json(); const activeLinks = links .filter(l => l.status === 'success' && l.has_revalidation === true) .slice(0, MAX_DOCS_TO_SCAN); const results = await Promise.allSettled( activeLinks.map(async (link) => { const docRepoPath = getDocRepoPath(corpus, link.index); const docUrl = `${HF_DATASET_BASE_URL}/raw/main/${docRepoPath}`; const docRes = await fetch(docUrl, { headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } }); if (!docRes.ok) return null; const pagesData = await docRes.json(); let totalMentions = 0; let verifiedMentions = 0; let totalPages = 0; let completedPages = 0; let humanAnnotations = 0; for (const page of pagesData) { const datasets = (page.datasets || []).filter(ds => { if (ds.dataset_tag === 'non-dataset' && ds.dataset_name?.judge_agrees === true) { return false; } return true; }); if (datasets.length === 0) continue; totalPages++; totalMentions += datasets.length; let pageVerified = 0; for (const ds of datasets) { if (ds.human_validated === true) { verifiedMentions++; pageVerified++; } if (ds.source === 'human') { humanAnnotations++; } } if (pageVerified === datasets.length) { completedPages++; } } return { corpus: corpus.id, index: link.index, totalPages, completedPages, totalMentions, verifiedMentions, humanAnnotations, complete: totalPages > 0 && completedPages === totalPages, }; }) ); const docs = results .filter(r => r.status === 'fulfilled' && r.value !== null) .map(r => r.value); allDocs.push(...docs); } const summary = { totalDocs: allDocs.length, completedDocs: allDocs.filter(d => d.complete).length, totalPages: allDocs.reduce((s, d) => s + d.totalPages, 0), completedPages: allDocs.reduce((s, d) => s + d.completedPages, 0), totalMentions: allDocs.reduce((s, d) => s + d.totalMentions, 0), verifiedMentions: allDocs.reduce((s, d) => s + d.verifiedMentions, 0), humanAnnotations: allDocs.reduce((s, d) => s + d.humanAnnotations, 0), docs: allDocs, }; return new Response(JSON.stringify(summary), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, s-maxage=300, stale-while-revalidate=59' } }); } catch (error) { console.error('Progress API error:', error); return new Response( JSON.stringify({ error: 'Failed to compute progress' }), { status: 500, headers: { 'Content-Type': 'application/json' } } ); } }