Spaces:
Running
Running
File size: 3,337 Bytes
72bc633 58f6308 72bc633 58f6308 72bc633 58f6308 72bc633 58f6308 72bc633 58f6308 72bc633 d6abea2 72bc633 58f6308 72bc633 58f6308 72bc633 58f6308 72bc633 58f6308 72bc633 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | """
PatchHawk typed models β OpenEnv-compliant Pydantic models.
Extends openenv.core base types (Action, Observation, State) so the
environment is fully compatible with the OpenEnv framework.
"""
from typing import Optional, List
from pydantic import BaseModel, ConfigDict, Field
from openenv.core import Action, Observation, State
# ββ Observation ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class PatchHawkObservation(Observation):
"""Observation returned by PatchHawkEnv after reset() and step()."""
code_snippet: str = Field(default="", description="Python source code to analyse")
static_flags: List[int] = Field(
default_factory=list,
description="Binary flags indicating static risk patterns",
)
risk_score: float = Field(
default=0.0, description="Precomputed heuristic risk score 0-1"
)
sandbox_telemetry: Optional[str] = Field(
None, description="Output from previous sandbox execution"
)
# ββ Action βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class PatchHawkAction(Action):
"""Action submitted to PatchHawkEnv.step().
action_type values:
0 = ANALYZE
1 = EXECUTE_SANDBOX
2 = BLOCK_PR
3 = SUBMIT_PATCH
4 = REQUEST_REVIEW
"""
action_type: int = Field(
...,
description="0: ANALYZE, 1: EXECUTE_SANDBOX, 2: BLOCK_PR, "
"3: SUBMIT_PATCH, 4: REQUEST_REVIEW",
)
patch_content: Optional[str] = Field(
None, description="The unified context patch if action is SUBMIT_PATCH"
)
reasoning: Optional[str] = Field(
None, description="Explanation of the vulnerability and chosen action"
)
predicted_risk: Optional[float] = Field(
None, description="LLM predicted risk score (0.0 to 1.0)"
)
# ββ State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class PatchHawkState(State):
"""Internal state of a PatchHawkEnv episode."""
scenario_id: str = Field(default="", description="Current scenario ID")
current_task: Optional[str] = Field(
None,
description="Active task_id (easy_typosquat, medium_obfuscated, hard_patch)",
)
last_action_type: Optional[int] = Field(None, description="Last action type taken")
patch_validated: bool = Field(
default=False, description="Whether the last patch was validated"
)
sandbox_log: Optional[str] = Field(
None, description="Most recent sandbox execution log"
)
# ββ Reward (standalone β no OpenEnv base type for rewards) βββββββββββ
class PatchHawkReward(BaseModel):
"""Pydantic reward model used by graders and inference logging."""
model_config = ConfigDict(
extra="forbid",
validate_assignment=True,
)
value: float = Field(default=0.0, description="Numeric reward signal")
reason: str = Field(default="", description="Human-readable reward reason")
|