# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the Compiler Pass Ordering RL Environment.

This environment simulates compiler optimization — a real task performed by
compilers like GCC and LLVM. An agent must select a sequence of optimization
passes to apply to a program's Intermediate Representation (IR) to minimize
estimated runtime cost.

Three tasks of increasing difficulty:
  Task 1 (easy):   Single-chain unlock. One prerequisite pass unlocks one target pass.
  Task 2 (medium): Two-chain unlock. Agent must discover two independent synergy chains.
  Task 3 (hard):   Full optimization. Agent must sequence all passes optimally across
                   a complex program with many interacting synergy gates.
"""

from typing import List, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field


# ---------------------------------------------------------------------------
# Pass registry
# ---------------------------------------------------------------------------
PASS_NAMES = {
    0:  "dead_code_elimination",
    1:  "constant_folding",
    2:  "loop_unrolling",
    3:  "function_inlining",
    4:  "vectorization",
    5:  "loop_invariant_motion",
    6:  "strength_reduction",
    7:  "common_subexpr_elimination",
    8:  "tail_call_optimization",
    9:  "branch_prediction_hints",
    10: "register_allocation",
    11: "instruction_scheduling",
    12: "memory_coalescing",
    13: "alias_analysis",
    14: "interprocedural_analysis",
}

NUM_PASSES = len(PASS_NAMES)
MAX_STEPS = 10

# Task IDs
TASK_EASY   = 1
TASK_MEDIUM = 2
TASK_HARD   = 3


# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------
class CompilerOptAction(Action):
    """
    Select which optimization pass to apply next.

    pass_id: integer in [0, 14]. See PASS_NAMES for the full mapping.
    Applying a pass that has already been applied this episode incurs a penalty.
    Applying a pass whose prerequisites have not been met applies it at reduced
    effectiveness (0.3x) — the agent must discover correct ordering.
    """
    pass_id: int = Field(..., ge=0, le=14, description="ID of the optimization pass to apply (0–14)")
    task_id: int = Field(default=TASK_HARD, ge=1, le=3, description="Task difficulty: 1=easy, 2=medium, 3=hard")


# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------
class CompilerOptObservation(Observation):
    """
    Full observable state of the simulated compiler IR after each step.

    The agent uses this to decide which pass to apply next. Key signals:
    - estimated_cost / baseline_cost: how much optimization has been achieved
    - passes_applied: history of applied passes (order matters for synergy)
    - synergy_state: current effectiveness multiplier for each pass
    - passes_available: which passes have not yet been applied
    - improvement_pct: total % cost reduction from baseline so far
    """
    # Cost tracking
    estimated_cost: float = Field(default=0.0, description="Current estimated runtime cost")
    baseline_cost:  float = Field(default=0.0, description="Cost before any optimization")

    # IR structural features (static for the episode, describe program type)
    num_instructions: int = Field(default=0, description="Total instruction count in the IR")
    num_loops:        int = Field(default=0, description="Number of loop structures")
    num_branches:     int = Field(default=0, description="Number of branch instructions")
    num_functions:    int = Field(default=0, description="Number of functions")
    loop_depth:       int = Field(default=0, description="Maximum loop nesting depth")
    program_type:     str = Field(default="", description="Human-readable program category")

    # Episode progress
    passes_applied:   List[int]   = Field(default_factory=list,  description="Ordered list of pass IDs applied so far")
    passes_available: List[int]   = Field(default_factory=list,  description="Pass IDs not yet applied this episode")
    step_count:       int         = Field(default=0,             description="Number of steps taken this episode")
    max_steps:        int         = Field(default=MAX_STEPS,     description="Maximum steps allowed per episode")

    # Synergy state: current effectiveness multiplier for each pass given history
    synergy_state: List[float] = Field(
        default_factory=lambda: [1.0] * NUM_PASSES,
        description="Per-pass effectiveness multiplier. >1 = boosted by prior passes, <1 = suppressed."
    )

    # Task info
    task_id:          int  = Field(default=TASK_HARD, description="Current task difficulty (1/2/3)")
    task_description: str  = Field(default="",        description="Human-readable task goal")

    # Terminal / result fields
    done:            bool  = Field(default=False, description="Whether this episode has ended")
    reward:          float = Field(default=0.0,   description="Reward received for the last action")
    improvement_pct: float = Field(default=0.0,   description="Total % cost reduction from baseline")
    last_pass_name:  Optional[str] = Field(default=None, description="Name of the last pass applied")

    # Grader score (populated on done=True)
    grader_score: Optional[float] = Field(
        default=None,
        description="Final task score 0.0–1.0, populated when done=True"
    )