model: force cuDNN determinism and fail fast on incomplete artifact

- Set torch.backends.cudnn.deterministic = True so any convolution kernel
selected at load time is the deterministic variant, reinforcing the
determinism guarantee already provided by eval-mode hard thresholding.
- After loading, verify every routing width in CELL_WIDTHS is served by a
weight-set and raise FileNotFoundError otherwise. A missing intermediate
file would previously leave a routing gap, silently sending that width's
primes to a wider, differently-trained cell rather than failing loudly.

Public-benchmark score unchanged: overall_accuracy 0.997, highest_tier_above_90 = 10.

Files changed (1) hide show

model.py +11 -0

model.py CHANGED Viewed

@@ -220,6 +220,7 @@ class HornerRNN(ModularMultiplicationModel):
         # not affect the determinism check. Inference is no_grad, so no backward-only
         # nondeterministic kernels are involved.
         torch.backends.cudnn.benchmark = False
         torch.backends.cuda.matmul.allow_tf32 = False
         torch.backends.cudnn.allow_tf32 = True
@@ -262,6 +263,16 @@ class HornerRNN(ModularMultiplicationModel):
         if not self.cells:
             raise FileNotFoundError(f"no weights*.pt found in {model_dir}")
     def preprocess_a(self, a):
         return a

         # not affect the determinism check. Inference is no_grad, so no backward-only
         # nondeterministic kernels are involved.
         torch.backends.cudnn.benchmark = False
+        torch.backends.cudnn.deterministic = True
         torch.backends.cuda.matmul.allow_tf32 = False
         torch.backends.cudnn.allow_tf32 = True
         if not self.cells:
             raise FileNotFoundError(f"no weights*.pt found in {model_dir}")
+        # Fail fast on an incomplete artifact: a missing intermediate weight file would
+        # otherwise leave a routing gap, silently sending that width's primes to a wider,
+        # differently-trained cell instead of raising. Every routing width must be covered.
+        missing = [w for w in CELL_WIDTHS if w not in self.cells]
+        if missing:
+            raise FileNotFoundError(
+                f"incomplete model: no trained cell for width(s) {missing} in {model_dir}; "
+                f"each width in CELL_WIDTHS must be served by a weights_shared_*.pt or weights<W>.pt file"
+            )
     def preprocess_a(self, a):
         return a