|
|
|
|
|
""" |
|
|
HARMONIC STACK ORCHESTRATOR |
|
|
Ghost in the Machine Labs |
|
|
|
|
|
The complete 5-layer stack in one entry point. |
|
|
Auto-configures from hardware profile. |
|
|
|
|
|
Layers: |
|
|
1. Executive - Strategic decisions |
|
|
2. Operator + Directors - Task coordination |
|
|
3. Ethics Council - Oversight |
|
|
4. Workers - Parallel execution |
|
|
5. Specialists - Domain expertise |
|
|
""" |
|
|
|
|
|
import asyncio |
|
|
import httpx |
|
|
from dataclasses import dataclass |
|
|
from typing import Dict, Any, Optional, List |
|
|
from datetime import datetime |
|
|
|
|
|
from hardware_profiles import select_profile, get_profile, print_profile_summary |
|
|
from worker_manager import WorkerPool, WorkTask, WorkResult, init_workers |
|
|
from specialist_pool import SpecialistPool, SpecialistTask, SpecialistResult, SpecialistType, init_specialists |
|
|
|
|
|
OLLAMA_URL = "http://localhost:11434" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class StackConfig: |
|
|
"""Full stack configuration from hardware profile.""" |
|
|
profile_name: str |
|
|
vram_gb: float |
|
|
|
|
|
|
|
|
executive_model: str |
|
|
executive_ctx: int |
|
|
|
|
|
|
|
|
operator_model: str |
|
|
directors: List[str] |
|
|
|
|
|
|
|
|
council_enabled: bool |
|
|
council_model: str |
|
|
council_members: int |
|
|
|
|
|
|
|
|
worker_model: str |
|
|
worker_mode: str |
|
|
worker_count: int |
|
|
|
|
|
|
|
|
specialists_enabled: bool |
|
|
specialist_model: str |
|
|
max_specialists: int |
|
|
|
|
|
|
|
|
class HarmonicStack: |
|
|
""" |
|
|
The complete Harmonic Stack. |
|
|
Auto-configures all 5 layers from hardware profile. |
|
|
""" |
|
|
|
|
|
def __init__(self, profile_name: str = None): |
|
|
"""Initialize all layers from hardware profile.""" |
|
|
|
|
|
if profile_name: |
|
|
self.profile_name = profile_name |
|
|
self.profile = get_profile(profile_name) |
|
|
self.vram_gb = 0 |
|
|
else: |
|
|
self.profile_name, self.profile, self.vram_gb = select_profile() |
|
|
|
|
|
|
|
|
self.config = self._build_config() |
|
|
|
|
|
|
|
|
self.worker_pool: Optional[WorkerPool] = None |
|
|
self.specialist_pool: Optional[SpecialistPool] = None |
|
|
|
|
|
self._ready = False |
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("HARMONIC STACK") |
|
|
print("="*60) |
|
|
print_profile_summary(self.profile_name, self.profile, self.vram_gb) |
|
|
|
|
|
def _build_config(self) -> StackConfig: |
|
|
"""Extract configuration from profile.""" |
|
|
p = self.profile |
|
|
return StackConfig( |
|
|
profile_name=self.profile_name, |
|
|
vram_gb=self.vram_gb, |
|
|
executive_model=p["layer_1_executive"]["model"], |
|
|
executive_ctx=p["layer_1_executive"]["ctx"], |
|
|
operator_model=p["layer_2_operator"]["model"], |
|
|
directors=p["layer_2_operator"].get("directors", []), |
|
|
council_enabled=p["layer_3_council"]["enabled"], |
|
|
council_model=p["layer_3_council"].get("model", ""), |
|
|
council_members=p["layer_3_council"].get("members", 0), |
|
|
worker_model=p["layer_4_worker"]["model"], |
|
|
worker_mode=p["layer_4_worker"]["mode"], |
|
|
worker_count=p["layer_4_worker"].get("count", 1), |
|
|
specialists_enabled=p["layer_5_specialists"]["enabled"], |
|
|
specialist_model=p["layer_5_specialists"].get("model", ""), |
|
|
max_specialists=p["layer_5_specialists"].get("max_loaded", 0) |
|
|
) |
|
|
|
|
|
async def start(self): |
|
|
"""Initialize and warm up all layers.""" |
|
|
print("\n[STACK] Starting Harmonic Stack...") |
|
|
|
|
|
|
|
|
print("[STACK] Initializing Layer 4 (Workers)...") |
|
|
self.worker_pool = await init_workers(self.profile_name) |
|
|
|
|
|
|
|
|
print("[STACK] Initializing Layer 5 (Specialists)...") |
|
|
self.specialist_pool = await init_specialists(self.profile_name) |
|
|
|
|
|
|
|
|
print("[STACK] Warming up Executive...") |
|
|
await self._warmup_model(self.config.executive_model) |
|
|
|
|
|
self._ready = True |
|
|
print("[STACK] Harmonic Stack ready\n") |
|
|
|
|
|
async def _warmup_model(self, model: str): |
|
|
"""Pre-load a model.""" |
|
|
async with httpx.AsyncClient(timeout=120.0) as client: |
|
|
await client.post( |
|
|
f"{OLLAMA_URL}/api/generate", |
|
|
json={ |
|
|
"model": model, |
|
|
"prompt": "Ready.", |
|
|
"stream": False, |
|
|
"options": {"num_predict": 1} |
|
|
} |
|
|
) |
|
|
|
|
|
async def _query_model(self, model: str, prompt: str, max_tokens: int = 1000) -> str: |
|
|
"""Query a model directly.""" |
|
|
async with httpx.AsyncClient(timeout=300.0) as client: |
|
|
response = await client.post( |
|
|
f"{OLLAMA_URL}/api/generate", |
|
|
json={ |
|
|
"model": model, |
|
|
"prompt": prompt, |
|
|
"stream": False, |
|
|
"options": {"num_predict": max_tokens} |
|
|
} |
|
|
) |
|
|
return response.json().get("response", "") |
|
|
|
|
|
async def executive_decide(self, query: str, context: str = "") -> str: |
|
|
"""Layer 1: Executive strategic decision.""" |
|
|
prompt = f"""You are the Executive of a Harmonic Stack AI system. |
|
|
Make a strategic decision on how to handle this request. |
|
|
|
|
|
Context: {context if context else 'None provided'} |
|
|
|
|
|
Request: {query} |
|
|
|
|
|
Decide: |
|
|
1. What type of task is this? (code/writing/analysis/research/general) |
|
|
2. Does it need specialist expertise? |
|
|
3. Should it go to workers for parallel processing? |
|
|
4. Does it require ethics review? |
|
|
|
|
|
Respond with your decision and reasoning. |
|
|
""" |
|
|
return await self._query_model(self.config.executive_model, prompt) |
|
|
|
|
|
async def work(self, prompt: str, context: str = "") -> WorkResult: |
|
|
"""Layer 4: Execute work task.""" |
|
|
if not self.worker_pool: |
|
|
raise RuntimeError("Stack not started") |
|
|
|
|
|
task = WorkTask( |
|
|
task_id=f"work_{datetime.now().timestamp()}", |
|
|
prompt=prompt, |
|
|
context=context |
|
|
) |
|
|
return await self.worker_pool.execute(task) |
|
|
|
|
|
async def work_parallel(self, prompts: List[str]) -> List[WorkResult]: |
|
|
"""Layer 4: Execute multiple tasks in parallel.""" |
|
|
if not self.worker_pool: |
|
|
raise RuntimeError("Stack not started") |
|
|
|
|
|
tasks = [ |
|
|
WorkTask( |
|
|
task_id=f"work_{i}_{datetime.now().timestamp()}", |
|
|
prompt=p |
|
|
) |
|
|
for i, p in enumerate(prompts) |
|
|
] |
|
|
return await self.worker_pool.execute_batch(tasks) |
|
|
|
|
|
async def specialist(self, specialist_type: SpecialistType, prompt: str, context: str = "") -> SpecialistResult: |
|
|
"""Layer 5: Execute specialist task.""" |
|
|
if not self.specialist_pool: |
|
|
raise RuntimeError("Stack not started") |
|
|
|
|
|
task = SpecialistTask( |
|
|
task_id=f"spec_{datetime.now().timestamp()}", |
|
|
specialist_type=specialist_type, |
|
|
prompt=prompt, |
|
|
context=context |
|
|
) |
|
|
return await self.specialist_pool.execute(task) |
|
|
|
|
|
async def process(self, query: str, context: str = "") -> Dict[str, Any]: |
|
|
""" |
|
|
Full stack processing: |
|
|
1. Executive decides approach |
|
|
2. Route to appropriate layer |
|
|
3. Return result |
|
|
""" |
|
|
if not self._ready: |
|
|
raise RuntimeError("Stack not started") |
|
|
|
|
|
start = datetime.now() |
|
|
|
|
|
|
|
|
decision = await self.executive_decide(query, context) |
|
|
|
|
|
|
|
|
needs_specialist = "specialist" in decision.lower() |
|
|
needs_parallel = "parallel" in decision.lower() or "worker" in decision.lower() |
|
|
|
|
|
result = { |
|
|
"query": query, |
|
|
"executive_decision": decision, |
|
|
"route": "unknown", |
|
|
"response": "", |
|
|
"elapsed_ms": 0 |
|
|
} |
|
|
|
|
|
|
|
|
if needs_specialist: |
|
|
|
|
|
spec_type = SpecialistType.ANALYSIS |
|
|
if "code" in decision.lower(): |
|
|
spec_type = SpecialistType.CODE |
|
|
elif "writ" in decision.lower(): |
|
|
spec_type = SpecialistType.WRITING |
|
|
elif "research" in decision.lower(): |
|
|
spec_type = SpecialistType.RESEARCH |
|
|
|
|
|
spec_result = await self.specialist(spec_type, query, context) |
|
|
result["route"] = f"specialist:{spec_type.value}" |
|
|
result["response"] = spec_result.response |
|
|
else: |
|
|
|
|
|
work_result = await self.work(query, context) |
|
|
result["route"] = "worker" |
|
|
result["response"] = work_result.response |
|
|
|
|
|
result["elapsed_ms"] = int((datetime.now() - start).total_seconds() * 1000) |
|
|
return result |
|
|
|
|
|
def status(self) -> Dict[str, Any]: |
|
|
"""Return full stack status.""" |
|
|
return { |
|
|
"profile": self.profile_name, |
|
|
"ready": self._ready, |
|
|
"config": { |
|
|
"executive": self.config.executive_model, |
|
|
"workers": f"{self.config.worker_model} x {self.config.worker_count}", |
|
|
"specialists": f"{self.config.specialist_model} (max {self.config.max_specialists})" if self.config.specialists_enabled else "disabled" |
|
|
}, |
|
|
"workers": self.worker_pool.status() if self.worker_pool else None, |
|
|
"specialists": self.specialist_pool.status() if self.specialist_pool else None |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async def create_stack(profile_name: str = None) -> HarmonicStack: |
|
|
"""Create and start a Harmonic Stack.""" |
|
|
stack = HarmonicStack(profile_name) |
|
|
await stack.start() |
|
|
return stack |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
async def main(): |
|
|
|
|
|
stack = await create_stack() |
|
|
|
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("STACK TEST") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
print("\n[TEST 1] Simple question:") |
|
|
result = await stack.process("What is the capital of France?") |
|
|
print(f" Route: {result['route']}") |
|
|
print(f" Response: {result['response'][:200]}...") |
|
|
print(f" Elapsed: {result['elapsed_ms']}ms") |
|
|
|
|
|
|
|
|
print("\n[TEST 2] Code request:") |
|
|
result = await stack.process("Write a Python function to reverse a string") |
|
|
print(f" Route: {result['route']}") |
|
|
print(f" Response: {result['response'][:300]}...") |
|
|
print(f" Elapsed: {result['elapsed_ms']}ms") |
|
|
|
|
|
|
|
|
print("\n[TEST 3] Parallel work:") |
|
|
results = await stack.work_parallel([ |
|
|
"What is 2+2?", |
|
|
"What is 3+3?", |
|
|
"What is 4+4?" |
|
|
]) |
|
|
for r in results: |
|
|
print(f" Task {r.task_id}: {r.response[:50]}... ({r.elapsed_ms}ms)") |
|
|
|
|
|
print("\n[STATUS]") |
|
|
import json |
|
|
print(json.dumps(stack.status(), indent=2)) |
|
|
|
|
|
asyncio.run(main()) |
|
|
|