Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request, jsonify | |
| from processor import DatasetCommandCenter | |
| import requests | |
| import threading | |
| import uuid | |
| import json | |
| app = Flask(__name__) | |
| jobs = {} | |
| def index(): | |
| return render_template('index.html') | |
| def analyze_metadata(): | |
| data = request.json | |
| center = DatasetCommandCenter(token=data.get('token')) | |
| return jsonify(center.get_dataset_metadata(data.get('dataset_id'))) | |
| def get_splits(): | |
| data = request.json | |
| center = DatasetCommandCenter(token=data.get('token')) | |
| return jsonify(center.get_splits_for_config(data.get('dataset_id'), data.get('config'))) | |
| def inspect_rows(): | |
| data = request.json | |
| center = DatasetCommandCenter(token=data.get('token')) | |
| return jsonify(center.inspect_dataset( | |
| data.get('dataset_id'), data.get('config'), data.get('split') | |
| )) | |
| def preview(): | |
| data = request.json | |
| recipe = data.get('recipe', {}) | |
| # DEBUG LOGGING | |
| print("="*60) | |
| print("PREVIEW REQUEST RECEIVED") | |
| print("="*60) | |
| print("Dataset:", data.get('dataset_id')) | |
| print("Config:", data.get('config')) | |
| print("Split:", data.get('split')) | |
| print("Recipe:") | |
| print(json.dumps(recipe, indent=2)) | |
| print("="*60) | |
| center = DatasetCommandCenter(token=data.get('token')) | |
| try: | |
| rows = center.preview_transform( | |
| data.get('dataset_id'), | |
| data.get('config'), | |
| data.get('split'), | |
| recipe | |
| ) | |
| print(f"Preview successful: {len(rows)} rows returned") | |
| return jsonify({"status": "success", "rows": rows}) | |
| except Exception as e: | |
| print(f"PREVIEW ERROR: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({"status": "error", "message": str(e)}) | |
| def execute(): | |
| data = request.json | |
| job_id = str(uuid.uuid4()) | |
| token = data.get('token') | |
| center = DatasetCommandCenter(token=token) | |
| args = ( | |
| data.get('dataset_id'), | |
| data.get('config'), | |
| data.get('split'), | |
| data.get('target_id'), | |
| data.get('recipe', {}), | |
| data.get('max_rows'), | |
| data.get('license') | |
| ) | |
| def task(): | |
| try: | |
| jobs[job_id] = {"status": "running"} | |
| res = center.process_and_push(*args) | |
| jobs[job_id] = {"status": "completed", "result": res} | |
| except Exception as e: | |
| jobs[job_id] = {"status": "failed", "error": str(e)} | |
| threading.Thread(target=task).start() | |
| return jsonify({"status": "started", "job_id": job_id}) | |
| def status(job_id): | |
| return jsonify(jobs.get(job_id, {"status": "unknown"})) | |
| if __name__ == '__main__': | |
| app.run(debug=True, port=7860, host="0.0.0.0") |