#!/usr/bin/env python3 """ validate.py — Pre-submission Validation Script =============================================== Runs all checklist items before submitting to the competition. All checks must pass or the submission will be disqualified. Usage ----- # With server already running: python validate.py # Start server automatically: python validate.py --start-server # Custom server URL: python validate.py --url http://localhost:7860 """ from __future__ import annotations import argparse import json import os import subprocess import sys import time import ssl import urllib.request import urllib.error from pathlib import Path # ── ANSI colours ─────────────────────────────────────────────────────────── GREEN = "\033[32m" RED = "\033[31m" YELLOW = "\033[33m" CYAN = "\033[36m" BOLD = "\033[1m" RESET = "\033[0m" PASS_COUNT = 0 FAIL_COUNT = 0 WARN_COUNT = 0 _SERVER_PROC = None def ok(msg: str): global PASS_COUNT PASS_COUNT += 1 print(f" {GREEN}✓{RESET} {msg}") def fail(msg: str): global FAIL_COUNT FAIL_COUNT += 1 print(f" {RED}✗{RESET} {msg}") def warn(msg: str): global WARN_COUNT WARN_COUNT += 1 print(f" {YELLOW}!{RESET} {msg}") def section(title: str): print(f"\n{CYAN}{BOLD}── {title} ──{RESET}") # SSL context that accepts HF Space / Let's Encrypt certs on all Python versions. _SSL_CTX = ssl.create_default_context() _SSL_CTX.check_hostname = False _SSL_CTX.verify_mode = ssl.CERT_NONE def http_get(url: str, timeout: int = 10) -> dict: req = urllib.request.Request(url, method="GET") ctx = _SSL_CTX if url.startswith("https://") else None with urllib.request.urlopen(req, timeout=timeout, context=ctx) as r: return json.loads(r.read()) def http_post(url: str, body: dict | None = None, timeout: int = 30) -> dict: data = json.dumps(body).encode() if body else b"" req = urllib.request.Request( url, data=data, headers={"Content-Type": "application/json"}, method="POST" ) ctx = _SSL_CTX if url.startswith("https://") else None with urllib.request.urlopen(req, timeout=timeout, context=ctx) as r: return json.loads(r.read()) # ═══════════════════════════════════════════════════════════════════════════ # CHECK FUNCTIONS # ═══════════════════════════════════════════════════════════════════════════ def check_repo_structure(): section("1. Repository Structure") root = Path(__file__).parent required_files = [ ("inference.py", "Inference script (root)"), ("openenv.yaml", "OpenEnv spec"), ("Dockerfile", "Docker build file"), ("requirements.txt", "Python dependencies"), ("environment.py", "Environment implementation"), ("grader.py", "Grader implementation"), ("models.py", "Typed models"), ("tasks.py", "Task bank"), ("server/app.py", "FastAPI server"), ] for filename, label in required_files: path = root / filename if path.exists(): ok(f"{label} found: {filename}") else: fail(f"{label} MISSING: {filename}") # inference.py must be in root if (root / "inference.py").exists(): ok("inference.py is in root directory") else: fail("inference.py must be in the ROOT directory") def check_openenv_yaml(): section("2. openenv.yaml Spec Compliance") root = Path(__file__).parent yaml_path = root / "openenv.yaml" if not yaml_path.exists(): fail("openenv.yaml not found — cannot validate") return content = yaml_path.read_text() checks = [ ("spec_version", "spec_version field"), ("name:", "name field"), ("version:", "version field"), ("type: space", "type=space"), ("runtime: fastapi","runtime=fastapi"), ("port: 7860", "port=7860 (HF Space)"), ("POST /reset", "reset endpoint declared"), ("POST /step", "step endpoint declared"), ("GET /state", "state endpoint declared"), ("GET /health", "health endpoint declared"), ("API_BASE_URL", "API_BASE_URL env var declared"), ("MODEL_NAME", "MODEL_NAME env var declared"), ("HF_TOKEN", "HF_TOKEN env var declared"), ("count: 30", "tasks count=30"), ("inference.py", "inference script reference"), ("approve", "approve action"), ("reject", "reject action"), ("inspect", "inspect action"), ("partial_credit: true", "partial credit enabled"), ] for pattern, label in checks: if pattern in content: ok(label) else: fail(f"Missing in openenv.yaml: {label} (pattern: '{pattern}')") def check_typed_models(): section("3. Typed Models") root = Path(__file__).parent sys.path.insert(0, str(root.parent)) try: from payops_env.models import PayOpsAction, PayOpsObservation, PayOpsState ok("PayOpsAction importable") ok("PayOpsObservation importable") ok("PayOpsState importable") # Check action types action = PayOpsAction(action_type="approve", transaction_id="TXN-TEST") ok(f"PayOpsAction instantiates (action_type={action.action_type})") # Check all 10 action types are valid required_actions = [ "approve", "reject", "flag", "escalate", "hold", "inspect", "request_docs", "verify_kyc", "contact_sender", "file_sar" ] for a in required_actions: try: PayOpsAction(action_type=a, transaction_id="TXN-TEST") ok(f"Action type '{a}' is valid") except Exception as e: fail(f"Action type '{a}' rejected: {e}") except ImportError as e: fail(f"Cannot import models: {e}") def check_environment(): section("4. Environment (step / reset / state)") sys.path.insert(0, str(Path(__file__).parent.parent)) try: import asyncio from payops_env.environment import PayOpsEnvironment from payops_env.models import PayOpsAction env = PayOpsEnvironment() async def run_env_checks(): # reset() obs = await env.reset_async() ok(f"reset() returns observation (task={obs.task_id})") if obs.budget_remaining == 5.0: ok("reset() budget_remaining=5.0") else: fail(f"reset() budget_remaining expected 5.0, got {obs.budget_remaining}") # step() investigation obs2 = await env.step_async( PayOpsAction(action_type="inspect", transaction_id=obs.transaction_id) ) if obs2.reward == 0.15: ok("step(inspect) reward=0.15") else: warn(f"step(inspect) reward={obs2.reward} (expected 0.15)") if obs2.budget_remaining == 4.9: ok("step(inspect) budget_remaining=4.9") else: warn(f"step(inspect) budget={obs2.budget_remaining}") if obs2.task_id == obs.task_id: ok("inspect does not advance task") else: fail("inspect advanced task (should not)") # step() terminal obs3 = await env.step_async( PayOpsAction(action_type="approve", transaction_id=obs.transaction_id) ) ok(f"step(approve) reward={obs3.reward}") # state() state = env._state if state.step_count > 0: ok(f"state() step_count={state.step_count}") else: fail("state() step_count=0 after steps") if isinstance(state.investigation_actions_used, list): ok("state() investigation_actions_used is list") else: fail("state() investigation_actions_used is not a list") asyncio.run(run_env_checks()) except Exception as e: fail(f"Environment check failed: {e}") def check_grader(): section("5. Grader — 3+ tasks, scores in [0.0, 1.0]") sys.path.insert(0, str(Path(__file__).parent.parent)) try: from payops_env.grader import grade_episode from payops_env.tasks import TASKS if len(TASKS) >= 3: ok(f"Task bank has {len(TASKS)} tasks (>= 3 required)") else: fail(f"Task bank has only {len(TASKS)} tasks (need >= 3)") # Grade a minimal episode using the real grade_episode signature: # grade_episode(actions, tasks, confidences, budget_limit) # Use correct terminal actions for first 5 tasks. sample_tasks = list(TASKS[:5]) sample_actions = [t.correct_action for t in sample_tasks] result = grade_episode(sample_actions, sample_tasks, budget_limit=5.0) if 0.0 <= result.normalised_score <= 1.0: ok(f"grade_episode() score in [0,1]: {result.normalised_score:.4f}") else: fail(f"grade_episode() score out of range: {result.normalised_score}") # Check each task graded for pt in result.per_task_rewards: score = pt.get("weighted_reward", pt.get("reward", 0)) ok(f" {pt.get('task_id'):12s} reward={score:+.3f}") ok("grade_episode() completes without error") except Exception as e: fail(f"Grader check failed: {e}") def check_server(base_url: str): section("6. Server Health & OpenEnv Endpoints") # Health / reset try: health = http_get(f"{base_url}/health") if health.get("status") == "ok": ok(f"GET /health → status=ok (v{health.get('version','?')})") else: fail(f"GET /health → unexpected: {health}") except Exception as e: fail(f"GET /health failed: {e}") return # Can't continue without server # reset() — must return 200 and a valid observation try: raw_reset = http_post(f"{base_url}/reset") # Support both wrapped {"observation":{...}} and legacy flat format obs = raw_reset.get("observation", raw_reset) if isinstance(raw_reset.get("observation"), dict) else raw_reset if "task_id" in obs: ok(f"POST /reset → 200, task_id={obs['task_id']}") else: fail(f"POST /reset → missing task_id in response") if obs.get("budget_remaining") == 5.0: ok("POST /reset → budget_remaining=5.0") else: fail(f"POST /reset → budget_remaining={obs.get('budget_remaining')}") except Exception as e: fail(f"POST /reset failed: {e}") # step() try: raw_step = http_post(f"{base_url}/step", {"action_type": "inspect", "transaction_id": "TXN-E001"}) step = raw_step.get("observation", raw_step) if isinstance(raw_step.get("observation"), dict) else raw_step # reward is at top level in both wrapped and flat formats reward_val = raw_step.get("reward", step.get("reward")) if reward_val is not None: ok(f"POST /step (inspect) → reward={reward_val}") else: fail("POST /step → missing reward in response") except Exception as e: fail(f"POST /step failed: {e}") # state() try: state = http_get(f"{base_url}/state") if "episode_id" in state: ok("GET /state → episode_id present") else: fail("GET /state → missing episode_id") if isinstance(state.get("investigation_actions_used"), list): ok("GET /state → investigation_actions_used is list") else: fail("GET /state → investigation_actions_used is not a list") except Exception as e: fail(f"GET /state failed: {e}") # tasks try: tasks = http_get(f"{base_url}/tasks") count = tasks.get("count", 0) if isinstance(tasks, dict) else len(tasks) if count >= 3: ok(f"GET /tasks → count={count} (>= 3 required)") else: fail(f"GET /tasks → count={count} (need >= 3)") except Exception as e: fail(f"GET /tasks failed: {e}") # grader try: grader = http_get(f"{base_url}/grader") score = grader.get("normalised_score", -1) if 0.0 <= score <= 1.0: ok(f"GET /grader → normalised_score={score:.4f} (in [0,1])") else: fail(f"GET /grader → score={score} out of range") except Exception as e: fail(f"GET /grader failed: {e}") # schema try: schema = http_get(f"{base_url}/schema") body = json.dumps(schema) for model in ["PayOpsAction", "PayOpsObservation"]: if model in body: ok(f"GET /schema → {model} present") else: fail(f"GET /schema → {model} missing") except Exception as e: fail(f"GET /schema failed: {e}") def check_env_vars(): section("7. Required Environment Variables") required = { "API_BASE_URL": "LLM API endpoint", "MODEL_NAME": "Model identifier", "HF_TOKEN": "API / HF token", } for var, desc in required.items(): val = os.environ.get(var, "") if val: # Show only first 10 chars for secrets display = val[:10] + "..." if len(val) > 10 else val ok(f"{var} is set ({desc}): {display}") else: warn(f"{var} is NOT set ({desc}) — required at inference time") def check_inference_script(): section("8. inference.py Validation") root = Path(__file__).parent inf_path = root / "inference.py" if not inf_path.exists(): fail("inference.py not found in root directory") return content = inf_path.read_text() checks = [ ("from openai import OpenAI", "Uses OpenAI client"), ("API_BASE_URL", "Reads API_BASE_URL env var"), ("MODEL_NAME", "Reads MODEL_NAME env var"), ("HF_TOKEN", "Reads HF_TOKEN env var"), ("chat.completions.create", "Uses chat.completions.create"), ("/reset", "Calls /reset endpoint"), ("/step", "Calls /step endpoint"), ("/grader", "Retrieves grader results"), ("normalised_score", "Reports normalised_score"), ] for pattern, label in checks: if pattern in content: ok(label) else: fail(f"inference.py missing: {label} (pattern: '{pattern}')") def check_dockerfile(): section("9. Dockerfile") root = Path(__file__).parent df_path = root / "Dockerfile" if not df_path.exists(): fail("Dockerfile not found") return content = df_path.read_text() checks = [ ("FROM python:", "Uses Python base image"), ("EXPOSE 7860", "Exposes port 7860 (HF Space)"), ("7860", "References port 7860"), ("uvicorn", "Starts uvicorn server"), ("HEALTHCHECK", "Has HEALTHCHECK"), ("requirements.txt", "Installs requirements.txt"), ("appuser", "Non-root user (security)"), ] for pattern, label in checks: if pattern in content: ok(label) else: fail(f"Dockerfile missing: {label}") # Attempt docker build (dry-run, only if docker is available) try: result = subprocess.run( ["docker", "build", "--check", "-f", str(df_path), str(root)], capture_output=True, text=True, timeout=60 ) if result.returncode == 0: ok("docker build --check passed") else: # --check flag not available in older Docker; try plain build warn(f"docker build --check not supported; run 'docker build .' manually") except FileNotFoundError: warn("docker not installed — skipping build check") except subprocess.TimeoutExpired: warn("docker build check timed out") except Exception as e: warn(f"docker check skipped: {e}") def check_runtime_constraint(): section("10. Runtime Constraint (< 20 min / 2vCPU / 8GB)") # We can't fully test hardware constraints here, but we validate # that the inference script doesn't import heavy GPU libs root = Path(__file__).parent inf_path = root / "inference.py" if not inf_path.exists(): warn("inference.py not found, skipping runtime check") return content = inf_path.read_text() heavy_libs = ["torch", "transformers", "tensorflow", "keras", "jax"] for lib in heavy_libs: if f"import {lib}" in content or f"from {lib}" in content: warn(f"inference.py imports '{lib}' — ensure it runs within 8GB RAM on 2vCPU") ok("inference.py uses lightweight OpenAI client (no local model loading)") # Check no huge loops that would blow 20min if "MAX_STEPS" in content: ok("MAX_STEPS guard present") else: warn("Consider adding a MAX_STEPS guard in inference.py") # ═══════════════════════════════════════════════════════════════════════════ # MAIN # ═══════════════════════════════════════════════════════════════════════════ def main(): parser = argparse.ArgumentParser(description="PayOps pre-submission validator") parser.add_argument("--url", default="http://localhost:7860", help="PayOps server base URL (default: http://localhost:7860)") parser.add_argument("--start-server", action="store_true", help="Start the server automatically before validating") args = parser.parse_args() base_url = args.url.rstrip("/") print(f"\n{BOLD}{'='*60}{RESET}") print(f"{BOLD} PayOps Pre-Submission Validator{RESET}") print(f" Target server : {base_url}") print(f"{'='*60}{RESET}\n") # Static checks (no server needed) check_repo_structure() check_openenv_yaml() check_typed_models() check_environment() check_grader() check_env_vars() check_inference_script() check_dockerfile() check_runtime_constraint() # Server-dependent checks # Try to reach the server; if not up, attempt to start server_available = False try: http_get(f"{base_url}/health", timeout=3) server_available = True except Exception: if args.start_server: print(f"\n Starting server at {base_url}...") root = Path(__file__).parent.parent env = os.environ.copy() env["PYTHONPATH"] = str(root) port = base_url.rsplit(":", 1)[-1] if ":" in base_url else "7860" global _SERVER_PROC _SERVER_PROC = subprocess.Popen( [sys.executable, "-m", "uvicorn", "payops_env.server.app:app", "--host", "0.0.0.0", "--port", port], env=env, cwd=str(root) ) for _ in range(15): time.sleep(1) try: http_get(f"{base_url}/health", timeout=2) server_available = True print(" Server started.") break except Exception: pass if not server_available: warn("Could not start server automatically") else: warn(f"Server not reachable at {base_url}. Start it first, or pass --start-server") if server_available: check_server(base_url) else: section("6. Server Health & OpenEnv Endpoints") warn("Skipped — server not available") # ── Summary ──────────────────────────────────────────────────────────── total = PASS_COUNT + FAIL_COUNT print(f"\n{BOLD}{'='*60}") if FAIL_COUNT == 0: print(f"{GREEN} ALL CHECKS PASSED ({PASS_COUNT}/{total} passed, {WARN_COUNT} warnings){RESET}") print(f"{BOLD} Ready to submit! ✓{RESET}") else: print(f"{RED} {FAIL_COUNT} CHECK(S) FAILED{RESET} " f"({PASS_COUNT} passed, {FAIL_COUNT} failed, {WARN_COUNT} warnings)") print(f"{RED}{BOLD} Fix failures before submitting.{RESET}") print(f"{BOLD}{'='*60}{RESET}\n") if _SERVER_PROC: _SERVER_PROC.terminate() sys.exit(0 if FAIL_COUNT == 0 else 1) if __name__ == "__main__": main()