Spaces:
Runtime error
Runtime error
| import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN, getCorpora, getLinksRepoPath, getDocRepoPath } from '../../../utils/config.js'; | |
| /** | |
| * GET /api/progress | |
| * Returns progress stats across ALL corpora. | |
| */ | |
| export async function GET() { | |
| try { | |
| const corpora = getCorpora(); | |
| const allDocs = []; | |
| for (const corpus of corpora) { | |
| const linksPath = getLinksRepoPath(corpus); | |
| const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/${linksPath}`; | |
| const linksRes = await fetch(linksUrl, { | |
| headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }, | |
| next: { revalidate: 300 } | |
| }); | |
| if (!linksRes.ok) continue; | |
| const links = await linksRes.json(); | |
| const activeLinks = links | |
| .filter(l => l.status === 'success' && l.has_revalidation === true) | |
| .slice(0, MAX_DOCS_TO_SCAN); | |
| const results = await Promise.allSettled( | |
| activeLinks.map(async (link) => { | |
| const docRepoPath = getDocRepoPath(corpus, link.index); | |
| const docUrl = `${HF_DATASET_BASE_URL}/raw/main/${docRepoPath}`; | |
| const docRes = await fetch(docUrl, { | |
| headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } | |
| }); | |
| if (!docRes.ok) return null; | |
| const pagesData = await docRes.json(); | |
| let totalMentions = 0; | |
| let verifiedMentions = 0; | |
| let totalPages = 0; | |
| let completedPages = 0; | |
| let humanAnnotations = 0; | |
| for (const page of pagesData) { | |
| const datasets = (page.datasets || []).filter(ds => { | |
| if (ds.dataset_tag === 'non-dataset' && ds.dataset_name?.judge_agrees === true) { | |
| return false; | |
| } | |
| return true; | |
| }); | |
| if (datasets.length === 0) continue; | |
| totalPages++; | |
| totalMentions += datasets.length; | |
| let pageVerified = 0; | |
| for (const ds of datasets) { | |
| if (ds.human_validated === true) { | |
| verifiedMentions++; | |
| pageVerified++; | |
| } | |
| if (ds.source === 'human') { | |
| humanAnnotations++; | |
| } | |
| } | |
| if (pageVerified === datasets.length) { | |
| completedPages++; | |
| } | |
| } | |
| return { | |
| corpus: corpus.id, | |
| index: link.index, | |
| totalPages, | |
| completedPages, | |
| totalMentions, | |
| verifiedMentions, | |
| humanAnnotations, | |
| complete: totalPages > 0 && completedPages === totalPages, | |
| }; | |
| }) | |
| ); | |
| const docs = results | |
| .filter(r => r.status === 'fulfilled' && r.value !== null) | |
| .map(r => r.value); | |
| allDocs.push(...docs); | |
| } | |
| const summary = { | |
| totalDocs: allDocs.length, | |
| completedDocs: allDocs.filter(d => d.complete).length, | |
| totalPages: allDocs.reduce((s, d) => s + d.totalPages, 0), | |
| completedPages: allDocs.reduce((s, d) => s + d.completedPages, 0), | |
| totalMentions: allDocs.reduce((s, d) => s + d.totalMentions, 0), | |
| verifiedMentions: allDocs.reduce((s, d) => s + d.verifiedMentions, 0), | |
| humanAnnotations: allDocs.reduce((s, d) => s + d.humanAnnotations, 0), | |
| docs: allDocs, | |
| }; | |
| return new Response(JSON.stringify(summary), { | |
| status: 200, | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'Cache-Control': 'public, s-maxage=300, stale-while-revalidate=59' | |
| } | |
| }); | |
| } catch (error) { | |
| console.error('Progress API error:', error); | |
| return new Response( | |
| JSON.stringify({ error: 'Failed to compute progress' }), | |
| { status: 500, headers: { 'Content-Type': 'application/json' } } | |
| ); | |
| } | |
| } | |