# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the Compiler Pass Ordering RL Environment. This environment simulates compiler optimization — a real task performed by compilers like GCC and LLVM. An agent must select a sequence of optimization passes to apply to a program's Intermediate Representation (IR) to minimize estimated runtime cost. Three tasks of increasing difficulty: Task 1 (easy): Single-chain unlock. One prerequisite pass unlocks one target pass. Task 2 (medium): Two-chain unlock. Agent must discover two independent synergy chains. Task 3 (hard): Full optimization. Agent must sequence all passes optimally across a complex program with many interacting synergy gates. """ from typing import List, Optional from openenv.core.env_server.types import Action, Observation from pydantic import Field # --------------------------------------------------------------------------- # Pass registry # --------------------------------------------------------------------------- PASS_NAMES = { 0: "dead_code_elimination", 1: "constant_folding", 2: "loop_unrolling", 3: "function_inlining", 4: "vectorization", 5: "loop_invariant_motion", 6: "strength_reduction", 7: "common_subexpr_elimination", 8: "tail_call_optimization", 9: "branch_prediction_hints", 10: "register_allocation", 11: "instruction_scheduling", 12: "memory_coalescing", 13: "alias_analysis", 14: "interprocedural_analysis", } NUM_PASSES = len(PASS_NAMES) MAX_STEPS = 10 # Task IDs TASK_EASY = 1 TASK_MEDIUM = 2 TASK_HARD = 3 # --------------------------------------------------------------------------- # Action # --------------------------------------------------------------------------- class CompilerOptAction(Action): """ Select which optimization pass to apply next. pass_id: integer in [0, 14]. See PASS_NAMES for the full mapping. Applying a pass that has already been applied this episode incurs a penalty. Applying a pass whose prerequisites have not been met applies it at reduced effectiveness (0.3x) — the agent must discover correct ordering. """ pass_id: int = Field(..., ge=0, le=14, description="ID of the optimization pass to apply (0–14)") task_id: int = Field(default=TASK_HARD, ge=1, le=3, description="Task difficulty: 1=easy, 2=medium, 3=hard") # --------------------------------------------------------------------------- # Observation # --------------------------------------------------------------------------- class CompilerOptObservation(Observation): """ Full observable state of the simulated compiler IR after each step. The agent uses this to decide which pass to apply next. Key signals: - estimated_cost / baseline_cost: how much optimization has been achieved - passes_applied: history of applied passes (order matters for synergy) - synergy_state: current effectiveness multiplier for each pass - passes_available: which passes have not yet been applied - improvement_pct: total % cost reduction from baseline so far """ # Cost tracking estimated_cost: float = Field(default=0.0, description="Current estimated runtime cost") baseline_cost: float = Field(default=0.0, description="Cost before any optimization") # IR structural features (static for the episode, describe program type) num_instructions: int = Field(default=0, description="Total instruction count in the IR") num_loops: int = Field(default=0, description="Number of loop structures") num_branches: int = Field(default=0, description="Number of branch instructions") num_functions: int = Field(default=0, description="Number of functions") loop_depth: int = Field(default=0, description="Maximum loop nesting depth") program_type: str = Field(default="", description="Human-readable program category") # Episode progress passes_applied: List[int] = Field(default_factory=list, description="Ordered list of pass IDs applied so far") passes_available: List[int] = Field(default_factory=list, description="Pass IDs not yet applied this episode") step_count: int = Field(default=0, description="Number of steps taken this episode") max_steps: int = Field(default=MAX_STEPS, description="Maximum steps allowed per episode") # Synergy state: current effectiveness multiplier for each pass given history synergy_state: List[float] = Field( default_factory=lambda: [1.0] * NUM_PASSES, description="Per-pass effectiveness multiplier. >1 = boosted by prior passes, <1 = suppressed." ) # Task info task_id: int = Field(default=TASK_HARD, description="Current task difficulty (1/2/3)") task_description: str = Field(default="", description="Human-readable task goal") # Terminal / result fields done: bool = Field(default=False, description="Whether this episode has ended") reward: float = Field(default=0.0, description="Reward received for the last action") improvement_pct: float = Field(default=0.0, description="Total % cost reduction from baseline") last_pass_name: Optional[str] = Field(default=None, description="Name of the last pass applied") # Grader score (populated on done=True) grader_score: Optional[float] = Field( default=None, description="Final task score 0.0–1.0, populated when done=True" )