| """model.py —— DQN 卷积神经网络结构 |
| |
| 网络设计 |
| -------- |
| 输入形状:``(B, 4, N, N)`` (B = Batch Size,4 通道观测) |
| 输出形状:``(B, 4)`` (4 个离散动作的 Q 值估计) |
| |
| 架构: |
| Conv2d(4→32, k=3, pad=1) → ReLU |
| Conv2d(32→64, k=3, pad=1) → ReLU |
| Conv2d(64→64, k=3, pad=1) → ReLU |
| Flatten |
| Linear(64·N·N → 256) → ReLU |
| Linear(256 → num_actions) |
| |
| 设计原则 |
| -------- |
| * 三层 Conv 均使用 padding=1,保持空间分辨率不变(适配小迷宫)。 |
| * Flatten 后接两层全连接,避免参数量随 N² 爆炸时 FC 层过大。 |
| * 权重初始化:Conv 层用 Kaiming Normal(ReLU 最优),FC 层用 Xavier Uniform。 |
| |
| 架构选型论证 |
| ------------ |
| * **CNN vs MLP**:观测为 (4, N, N) 结构化网格,CNN 具有平移等变性——"墙在左、目标在右"的 |
| 空间关系无论出现在地图何处,同一 filter 均可检测,参数效率优于 MLP。MLP 需要 |
| 将所有位置的空间关系独立学习,在随机起终点设定下泛化更差。 |
| * **感受野分析**:三层 3×3 Conv(无 stride/pool)的理论感受野由递推公式 $RF_l = RF_{l-1} + (k_l - 1) \cdot \prod_{i<l} s_i$ 计算($RF_0=1$, $k_l=3$, $s_i=1$),逐层累加得 $3 \to 5 \to 7$,即 7×7。 |
| 对 10×10 迷宫,7×7 感受野无法覆盖全图(对角线距离约 14 格);但 Flatten 后接的 |
| 全连接层将所有位置特征全局混合,弥补了 CNN 局部感受野的不足。Flatten→FC 的 |
| 全局聚合使网络实际上能对全图状态建模,纯感受野计算低估了该架构的全局感知能力。 |
| 若迁移至更大迷宫(≥20×20),建议在第三层 Conv 后加 stride=2 或 Global Average Pooling。 |
| |
| 验收断言(直接运行本文件):: |
| |
| python src/model.py |
| # 期望输出:DQNNetwork 输出维度验证通过:torch.Size([32, 4]) |
| """ |
|
|
| from __future__ import annotations |
|
|
| import torch |
| import torch.nn as nn |
|
|
|
|
| __all__ = ["DQNNetwork", "DuelingDQNNetwork"] |
|
|
|
|
| class DQNNetwork(nn.Module): |
| """深度 Q 网络(DQN)卷积神经网络。 |
| |
| Args: |
| grid_size: 迷宫边长 N,决定 Flatten 后的特征维度。 |
| input_channels: 观测通道数,默认 4(墙壁 / Agent / 终点 / 访问历史)。 |
| num_actions: 离散动作数,默认 4(上下左右)。 |
| |
| Example: |
| >>> model = DQNNetwork(grid_size=10) |
| >>> x = torch.randn(32, 4, 10, 10) |
| >>> model(x).shape |
| torch.Size([32, 4]) |
| """ |
|
|
| def __init__( |
| self, |
| grid_size: int, |
| input_channels: int = 4, |
| num_actions: int = 4, |
| ) -> None: |
| super().__init__() |
|
|
| |
| |
| self.conv = nn.Sequential( |
| nn.Conv2d(input_channels, 32, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| nn.Conv2d(32, 64, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| nn.Conv2d(64, 64, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| ) |
|
|
| |
| flat_dim: int = 64 * grid_size * grid_size |
| self.fc = nn.Sequential( |
| nn.Flatten(), |
| nn.Linear(flat_dim, 256), |
| nn.ReLU(inplace=True), |
| nn.Linear(256, num_actions), |
| ) |
|
|
| |
| self._init_weights() |
|
|
| |
|
|
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| """前向传播。 |
| |
| Args: |
| x: 形状 ``(B, C, N, N)`` 的 float32 张量,值域 ``[0, 1]``。 |
| |
| Returns: |
| 形状 ``(B, num_actions)`` 的 Q 值张量。 |
| """ |
| return self.fc(self.conv(x)) |
|
|
| |
|
|
| def _init_weights(self) -> None: |
| """对 Conv 层使用 Kaiming Normal,对 Linear 层使用 Xavier Uniform。""" |
| for module in self.modules(): |
| if isinstance(module, nn.Conv2d): |
| nn.init.kaiming_normal_(module.weight, nonlinearity="relu") |
| if module.bias is not None: |
| nn.init.zeros_(module.bias) |
| elif isinstance(module, nn.Linear): |
| nn.init.xavier_uniform_(module.weight) |
| if module.bias is not None: |
| nn.init.zeros_(module.bias) |
|
|
|
|
| class DuelingDQNNetwork(nn.Module): |
| """Dueling DQN 卷积神经网络(Wang et al., 2016)。 |
| |
| 将 Q(s,a) 分解为状态价值 V(s) 与动作优势 A(s,a) 之和: |
| Q(s,a) = V(s) + A(s,a) − mean_a'[A(s,a')] |
| |
| 减去均值消除 A 的不确定性常数,保证 V 与 A 可唯一辨识。 |
| |
| 相比 DQNNetwork 的优势:在大多数迷宫格子中,各动作的相对优劣差距很小 |
| ("往目标走"总是最优),此时 V(s) 可独立精确学习而无需每个动作都更新, |
| 理论上参数效率更高。本项目完整消融实验(随机起终点,10×10 迷宫,R4 最终结果) |
| 证实了这一优势:Dueling DQN Holdout 成功率 84%,优于 Double DQN(78%)和 |
| Double+Dueling(81%),V/A 分解与迷宫"多动作等效"状态高度适配。 |
| |
| Args: |
| grid_size: 迷宫边长 N,决定 Flatten 后的特征维度。 |
| input_channels: 观测通道数,默认 4(墙壁 / Agent / 终点 / 访问历史)。 |
| num_actions: 离散动作数,默认 4(上下左右)。 |
| |
| Example: |
| >>> model = DuelingDQNNetwork(grid_size=10) |
| >>> x = torch.randn(32, 4, 10, 10) |
| >>> model(x).shape |
| torch.Size([32, 4]) |
| """ |
|
|
| def __init__( |
| self, |
| grid_size: int, |
| input_channels: int = 4, |
| num_actions: int = 4, |
| ) -> None: |
| super().__init__() |
|
|
| |
| self.conv = nn.Sequential( |
| nn.Conv2d(input_channels, 32, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| nn.Conv2d(32, 64, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| nn.Conv2d(64, 64, kernel_size=3, padding=1), |
| nn.ReLU(inplace=True), |
| ) |
| self.flatten = nn.Flatten() |
|
|
| flat_dim: int = 64 * grid_size * grid_size |
|
|
| |
| self.value_stream = nn.Sequential( |
| nn.Linear(flat_dim, 256), |
| nn.ReLU(inplace=True), |
| nn.Linear(256, 1), |
| ) |
|
|
| |
| self.advantage_stream = nn.Sequential( |
| nn.Linear(flat_dim, 256), |
| nn.ReLU(inplace=True), |
| nn.Linear(256, num_actions), |
| ) |
|
|
| self._init_weights() |
|
|
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| """前向传播,输出 Q(s,a) = V(s) + A(s,a) − mean(A)。""" |
| feat = self.flatten(self.conv(x)) |
| V = self.value_stream(feat) |
| A = self.advantage_stream(feat) |
| return V + A - A.mean(dim=1, keepdim=True) |
|
|
| def _init_weights(self) -> None: |
| """对 Conv 层使用 Kaiming Normal,对 Linear 层使用 Xavier Uniform。""" |
| for module in self.modules(): |
| if isinstance(module, nn.Conv2d): |
| nn.init.kaiming_normal_(module.weight, nonlinearity="relu") |
| if module.bias is not None: |
| nn.init.zeros_(module.bias) |
| elif isinstance(module, nn.Linear): |
| nn.init.xavier_uniform_(module.weight) |
| if module.bias is not None: |
| nn.init.zeros_(module.bias) |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| |
| model = DQNNetwork(grid_size=5, input_channels=4, num_actions=4) |
| test_input = torch.randn(32, 4, 5, 5) |
| test_output = model(test_input) |
| assert test_output.shape == (32, 4), ( |
| f"DQN 输出维度错误,期望 (32, 4),实际得到 {test_output.shape}" |
| ) |
| print(f"[PASS] DQNNetwork 输出维度验证通过:{test_output.shape}") |
|
|
| |
| model_10 = DQNNetwork(grid_size=10) |
| out_10 = model_10(torch.randn(16, 4, 10, 10)) |
| assert out_10.shape == (16, 4) |
| print(f"[PASS] grid=10 输出维度验证通过:{out_10.shape}") |
|
|
| total_params = sum(p.numel() for p in model.parameters()) |
| print(f"[INFO] 5×5 网络参数量:{total_params:,}") |
|
|
| |
| dueling_5 = DuelingDQNNetwork(grid_size=5, input_channels=4, num_actions=4) |
| dueling_out = dueling_5(torch.randn(32, 4, 5, 5)) |
| assert dueling_out.shape == (32, 4), ( |
| f"Dueling 输出维度错误,期望 (32, 4),实际得到 {dueling_out.shape}" |
| ) |
| print(f"[PASS] DuelingDQNNetwork 输出维度验证通过:{dueling_out.shape}") |
|
|
| dueling_10 = DuelingDQNNetwork(grid_size=10) |
| assert dueling_10(torch.randn(16, 4, 10, 10)).shape == (16, 4) |
| print(f"[PASS] DuelingDQNNetwork grid=10 验证通过") |
|
|
| d_params = sum(p.numel() for p in dueling_5.parameters()) |
| print(f"[INFO] DQNNetwork 5×5 参数量:{total_params:,}") |
| print(f"[INFO] DuelingDQNNet 5×5 参数量:{d_params:,}") |
|
|
| print("✅ model.py 验收通过。") |
|
|