Spaces:
Running
Running
| """ | |
| PatchHawk HTTP Server β OpenEnv-compliant environment server. | |
| Serves PatchHawkEnv over HTTP/WebSocket using openenv.core.create_app. | |
| Also includes the legacy A2A endpoints for backwards-compatibility. | |
| Run: | |
| python -m patchhawk.agent.server | |
| python -m patchhawk.agent.server --port 7860 | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from typing import Optional | |
| from pydantic import BaseModel, Field | |
| from openenv.core import create_app | |
| from patchhawk.agent.environment import PatchHawkEnv | |
| from patchhawk.env_models import PatchHawkAction, PatchHawkObservation | |
| # ββ OpenEnv factory βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _env_factory() -> PatchHawkEnv: | |
| """Factory callable for create_app β returns a fresh PatchHawkEnv.""" | |
| scenarios_path = os.getenv("PATCHHAWK_SCENARIOS", "patchhawk/data/scenarios.json") | |
| return PatchHawkEnv(scenarios_path=scenarios_path, use_docker=False) | |
| # ββ OpenEnv app (primary) βββββββββββββββββββββββββββββββββββββββββ | |
| openenv_app = create_app( | |
| _env_factory, | |
| PatchHawkAction, | |
| PatchHawkObservation, | |
| env_name="PatchHawk", | |
| ) | |
| # ββ Legacy A2A schemas ββββββββββββββββββββββββββββββββββββββββββββ | |
| class ActRequest(BaseModel): | |
| code_snippet: str = Field(..., description="Python source code to analyse") | |
| class ActResponse(BaseModel): | |
| decision: str = Field( | |
| ..., description="Agent decision (BLOCK_PR, SUBMIT_PATCH, etc.)" | |
| ) | |
| patch: Optional[str] = Field(None, description="Proposed patch code, if any") | |
| confidence: float = Field(..., description="Agent confidence score 0-1") | |
| reward: float = Field(0.0, description="Total episode reward") | |
| details: Optional[dict] = None | |
| class AgentCard(BaseModel): | |
| name: str = "PatchHawk" | |
| description: str = ( | |
| "RL-powered supply-chain vulnerability detector and auto-patcher. " | |
| "Trained via GRPO on Qwen2.5-Coder-7B with Docker-sandboxed validation." | |
| ) | |
| capabilities: list = [ | |
| "code_analysis", | |
| "vulnerability_detection", | |
| "auto_patching", | |
| "sandbox_execution", | |
| ] | |
| input_schema: dict = { | |
| "type": "object", | |
| "properties": { | |
| "code_snippet": { | |
| "type": "string", | |
| "description": "Python source code to analyse", | |
| }, | |
| }, | |
| "required": ["code_snippet"], | |
| } | |
| output_schema: dict = { | |
| "type": "object", | |
| "properties": { | |
| "decision": {"type": "string"}, | |
| "patch": {"type": "string", "nullable": True}, | |
| "confidence": {"type": "number"}, | |
| "reward": {"type": "number"}, | |
| }, | |
| } | |
| # ββ Singleton env for A2A βββββββββββββββββββββββββββββββββββββββββ | |
| _env: Optional[PatchHawkEnv] = None | |
| def _get_env() -> PatchHawkEnv: | |
| global _env | |
| if _env is None: | |
| scenarios_path = os.getenv( | |
| "PATCHHAWK_SCENARIOS", "patchhawk/data/scenarios.json" | |
| ) | |
| _env = PatchHawkEnv(scenarios_path=scenarios_path, use_docker=False) | |
| return _env | |
| # ββ Mount A2A routes on the OpenEnv app βββββββββββββββββββββββββββ | |
| app = openenv_app | |
| def agent_card(): | |
| """Return agent identity and capabilities.""" | |
| return AgentCard() | |
| def agent_act(request: ActRequest): | |
| """ | |
| Run a heuristic agent for one episode on the supplied code snippet. | |
| Uses risk-score based heuristic policy (MVP stand-in for trained model). | |
| """ | |
| env = _get_env() | |
| scenario = { | |
| "id": "a2a_request", | |
| "type": "unknown", | |
| "label": "unknown", | |
| "code_snippet": request.code_snippet, | |
| "patch": None, | |
| "unit_test_code": None, | |
| "attack_type": None, | |
| } | |
| # Force this scenario | |
| obs = env.reset(scenario=scenario) | |
| total_reward = 0.0 | |
| decision_action = None | |
| while not obs.done: | |
| risk = obs.risk_score | |
| # Native rule-based baseline policy (Zero-shot RL initialization state) | |
| if risk > 0.6: | |
| action = PatchHawkAction(action_type=PatchHawkEnv.ACTION_BLOCK_PR) | |
| elif risk > 0.3: | |
| action = PatchHawkAction(action_type=PatchHawkEnv.ACTION_EXECUTE_SANDBOX) | |
| else: | |
| action = PatchHawkAction(action_type=PatchHawkEnv.ACTION_REQUEST_REVIEW) | |
| obs = env.step(action) | |
| total_reward += obs.reward or 0.0 | |
| decision_action = action.action_type | |
| confidence = min(1.0, max(0.0, 0.5 + total_reward / 6.0)) | |
| return ActResponse( | |
| decision=PatchHawkEnv.ACTION_NAMES[decision_action] | |
| if decision_action is not None | |
| else "ANALYZE", | |
| patch=None, | |
| confidence=round(confidence, 2), | |
| reward=round(total_reward, 2), | |
| details=obs.metadata, | |
| ) | |
| # ββ CLI entry point ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| import uvicorn | |
| port = 8000 | |
| # Parse --port flag | |
| args = sys.argv[1:] | |
| if "--port" in args: | |
| idx = args.index("--port") | |
| if idx + 1 < len(args): | |
| port = int(args[idx + 1]) | |
| host = os.getenv("HOST", "0.0.0.0") | |
| uvicorn.run(app, host=host, port=port) | |
| if __name__ == "__main__": | |
| main() | |