| """Test the adaptive router with easy, medium, and hard queries.""" |
|
|
| import json |
| import httpx |
|
|
| BASE = "http://localhost:8000" |
|
|
|
|
| def chat(content, domain=None, max_tokens=200): |
| body = { |
| "messages": [{"role": "user", "content": content}], |
| "max_tokens": max_tokens, |
| } |
| if domain: |
| body["domain"] = domain |
| r = httpx.post(f"{BASE}/v1/chat/completions", json=body, timeout=30) |
| return r.json() |
|
|
|
|
| print("=== Testing Adaptive Router ===\n") |
|
|
| |
| result = chat("Hello!", max_tokens=50) |
| print(f"Easy query -> {result['model']}") |
|
|
| |
| result = chat("Write a Python function to validate an email address.") |
| print(f"Medium query -> {result['model']}") |
|
|
| |
| result = chat( |
| "Implement a distributed consensus algorithm with Byzantine fault tolerance.", |
| domain="fintech", |
| max_tokens=300, |
| ) |
| print(f"Hard query -> {result['model']}") |
|
|
| |
| r = httpx.get(f"{BASE}/v1/router/stats") |
| s = r.json() |
| print(f"\n=== Router Stats ===") |
| print(f"Total queries: {s['total_queries']}") |
| print(f"Local: {s['local_pct']}%") |
| print(f"Teacher: {s['teacher_pct']}%") |
| print(f"Self-verify pass rate: {s['self_verify_pass_rate']}%") |
| print(f"Cost saved: ${s['estimated_cost_saved']:.4f}") |
|
|