import { NextResponse } from 'next/server'; import fs from 'fs'; import { commit } from '@huggingface/hub'; import { HF_DATASET_ID, HF_DATASET_BASE_URL, getCorpus, getDocRepoPath, getDocLocalPath } from '../../../utils/config.js'; const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development'; /** * PUT /api/validate * Body: { corpus, document_index, page_number, dataset_index, updates } */ export async function PUT(request) { try { const { corpus: corpusId, document_index, page_number, dataset_index, updates } = await request.json(); const corpus = getCorpus(corpusId); if (document_index == null || page_number == null || dataset_index == null || !updates) { return NextResponse.json( { error: 'Missing document_index, page_number, dataset_index, or updates' }, { status: 400 } ); } let pagesData; if (isHFSpace()) { const repoPath = getDocRepoPath(corpus, document_index); const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`; const res = await fetch(url, { headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } }); if (!res.ok) { return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 }); } pagesData = await res.json(); } else { const filePath = getDocLocalPath(corpus, document_index); if (!fs.existsSync(filePath)) { return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 }); } pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8')); } const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number); if (pageIdx === -1) { return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 }); } const datasets = pagesData[pageIdx].datasets || []; if (dataset_index < 0 || dataset_index >= datasets.length) { return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 }); } // Per-annotator validation const currentEntry = pagesData[pageIdx].datasets[dataset_index]; const annotator = updates.annotator || 'unknown'; const validationFields = ['human_validated', 'human_verdict', 'human_notes', 'annotator', 'validated_at']; const isValidation = validationFields.some(f => f in updates); if (isValidation) { const validations = currentEntry.validations || []; const existingIdx = validations.findIndex(v => v.annotator === annotator); const validationEntry = { human_validated: updates.human_validated, human_verdict: updates.human_verdict, human_notes: updates.human_notes || null, annotator, validated_at: updates.validated_at || new Date().toISOString(), }; if (existingIdx >= 0) { validations[existingIdx] = validationEntry; } else { validations.push(validationEntry); } pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, validations }; } else { pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, ...updates }; } // Save back if (isHFSpace()) { const repoPath = getDocRepoPath(corpus, document_index); const content = JSON.stringify(pagesData, null, 2); await commit({ repo: { type: 'dataset', name: HF_DATASET_ID }, credentials: { accessToken: process.env.HF_TOKEN }, title: `Validate ${corpus.id}/doc_${document_index} page ${page_number}`, operations: [{ operation: 'addOrUpdate', path: repoPath, content: new Blob([content], { type: 'application/json' }), }], }); } else { const filePath = getDocLocalPath(corpus, document_index); fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2)); } return NextResponse.json({ success: true, dataset: pagesData[pageIdx].datasets[dataset_index], }); } catch (error) { console.error('Validate error:', error); return NextResponse.json({ error: 'Failed to validate: ' + error.message }, { status: 500 }); } } /** * DELETE /api/validate?corpus=X&doc=X&page=Y&idx=Z */ export async function DELETE(request) { try { const { searchParams } = new URL(request.url); const corpusId = searchParams.get('corpus'); const document_index = parseInt(searchParams.get('doc'), 10); const page_number = parseInt(searchParams.get('page'), 10); const dataset_index = parseInt(searchParams.get('idx'), 10); const corpus = getCorpus(corpusId); if (isNaN(document_index) || isNaN(page_number) || isNaN(dataset_index)) { return NextResponse.json( { error: 'Missing doc, page, or idx parameter' }, { status: 400 } ); } let pagesData; if (isHFSpace()) { const repoPath = getDocRepoPath(corpus, document_index); const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`; const res = await fetch(url, { headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } }); if (!res.ok) { return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 }); } pagesData = await res.json(); } else { const filePath = getDocLocalPath(corpus, document_index); if (!fs.existsSync(filePath)) { return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 }); } pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8')); } const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number); if (pageIdx === -1) { return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 }); } const datasets = pagesData[pageIdx].datasets || []; if (dataset_index < 0 || dataset_index >= datasets.length) { return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 }); } pagesData[pageIdx].datasets.splice(dataset_index, 1); if (isHFSpace()) { const repoPath = getDocRepoPath(corpus, document_index); const content = JSON.stringify(pagesData, null, 2); await commit({ repo: { type: 'dataset', name: HF_DATASET_ID }, credentials: { accessToken: process.env.HF_TOKEN }, title: `Delete from ${corpus.id}/doc_${document_index} page ${page_number}`, operations: [{ operation: 'addOrUpdate', path: repoPath, content: new Blob([content], { type: 'application/json' }), }], }); } else { const filePath = getDocLocalPath(corpus, document_index); fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2)); } return NextResponse.json({ success: true }); } catch (error) { console.error('Delete error:', error); return NextResponse.json({ error: 'Failed to delete: ' + error.message }, { status: 500 }); } }