AbstractPhil
/

tiny-flux-deep

@@ -41,12 +41,12 @@ from pathlib import Path
 class TinyFluxConfig:
     """
     Configuration for TinyFlux-Deep v4.1 model.
     This config fully defines the model architecture and can be used to:
     1. Initialize a new model
-    2. Convert checkpoints between versions
     3. Validate checkpoint compatibility
     All dimension constraints are validated on creation.
     """
@@ -105,10 +105,10 @@ class TinyFluxConfig:
                 f"num_attention_heads * attention_head_dim ({expected_hidden})"
             )
-        # Validate RoPE dimensions
         if isinstance(self.axes_dims_rope, list):
             self.axes_dims_rope = tuple(self.axes_dims_rope)
         rope_sum = sum(self.axes_dims_rope)
         if rope_sum != self.attention_head_dim:
             raise ValueError(
@@ -158,11 +158,11 @@ class TinyFluxConfig:
     def validate_checkpoint(self, state_dict: Dict[str, torch.Tensor]) -> List[str]:
         """
         Validate that a checkpoint matches this config.
         Returns list of warnings (empty if perfect match).
         """
         warnings = []
         # Check double block count
         max_double = 0
         for key in state_dict:
@@ -171,7 +171,7 @@ class TinyFluxConfig:
                 max_double = max(max_double, idx + 1)
         if max_double != self.num_double_layers:
             warnings.append(f"double_blocks: checkpoint has {max_double}, config expects {self.num_double_layers}")
         # Check single block count
         max_single = 0
         for key in state_dict:
@@ -180,25 +180,25 @@ class TinyFluxConfig:
                 max_single = max(max_single, idx + 1)
         if max_single != self.num_single_layers:
             warnings.append(f"single_blocks: checkpoint has {max_single}, config expects {self.num_single_layers}")
         # Check hidden size from a known weight
         if "img_in.weight" in state_dict:
             w = state_dict["img_in.weight"]
             if w.shape[0] != self.hidden_size:
                 warnings.append(f"hidden_size: checkpoint has {w.shape[0]}, config expects {self.hidden_size}")
         # Check for v4.1 components
         has_sol = any(k.startswith("sol_prior.") for k in state_dict)
         has_t5 = any(k.startswith("t5_pool.") for k in state_dict)
         has_lune = any(k.startswith("lune_predictor.") for k in state_dict)
         if self.use_sol_prior and not has_sol:
             warnings.append("config expects sol_prior but checkpoint missing it")
         if self.use_t5_vec and not has_t5:
             warnings.append("config expects t5_pool but checkpoint missing it")
         if self.use_lune_expert and not has_lune:
             warnings.append("config expects lune_predictor but checkpoint missing it")
         return warnings
@@ -1112,6 +1112,23 @@ class TinyFluxDeep(nn.Module):
         if expert_features is not None and lune_features is None:
             lune_features = expert_features
         # Input projections
         img = self.img_in(hidden_states)
         txt = self.txt_in(encoder_hidden_states)

 class TinyFluxConfig:
     """
     Configuration for TinyFlux-Deep v4.1 model.
     This config fully defines the model architecture and can be used to:
     1. Initialize a new model
+    2. Convert checkpoints between versions
     3. Validate checkpoint compatibility
     All dimension constraints are validated on creation.
     """
                 f"num_attention_heads * attention_head_dim ({expected_hidden})"
             )
+        # Validate RoPE dimensions
         if isinstance(self.axes_dims_rope, list):
             self.axes_dims_rope = tuple(self.axes_dims_rope)
         rope_sum = sum(self.axes_dims_rope)
         if rope_sum != self.attention_head_dim:
             raise ValueError(
     def validate_checkpoint(self, state_dict: Dict[str, torch.Tensor]) -> List[str]:
         """
         Validate that a checkpoint matches this config.
         Returns list of warnings (empty if perfect match).
         """
         warnings = []
         # Check double block count
         max_double = 0
         for key in state_dict:
                 max_double = max(max_double, idx + 1)
         if max_double != self.num_double_layers:
             warnings.append(f"double_blocks: checkpoint has {max_double}, config expects {self.num_double_layers}")
         # Check single block count
         max_single = 0
         for key in state_dict:
                 max_single = max(max_single, idx + 1)
         if max_single != self.num_single_layers:
             warnings.append(f"single_blocks: checkpoint has {max_single}, config expects {self.num_single_layers}")
         # Check hidden size from a known weight
         if "img_in.weight" in state_dict:
             w = state_dict["img_in.weight"]
             if w.shape[0] != self.hidden_size:
                 warnings.append(f"hidden_size: checkpoint has {w.shape[0]}, config expects {self.hidden_size}")
         # Check for v4.1 components
         has_sol = any(k.startswith("sol_prior.") for k in state_dict)
         has_t5 = any(k.startswith("t5_pool.") for k in state_dict)
         has_lune = any(k.startswith("lune_predictor.") for k in state_dict)
         if self.use_sol_prior and not has_sol:
             warnings.append("config expects sol_prior but checkpoint missing it")
         if self.use_t5_vec and not has_t5:
             warnings.append("config expects t5_pool but checkpoint missing it")
         if self.use_lune_expert and not has_lune:
             warnings.append("config expects lune_predictor but checkpoint missing it")
         return warnings
         if expert_features is not None and lune_features is None:
             lune_features = expert_features
+        # Ensure consistent dtype (text encoders often output float32)
+        model_dtype = self.img_in.weight.dtype
+        hidden_states = hidden_states.to(dtype=model_dtype)
+        encoder_hidden_states = encoder_hidden_states.to(dtype=model_dtype)
+        pooled_projections = pooled_projections.to(dtype=model_dtype)
+        timestep = timestep.to(dtype=model_dtype)
+        # Cast optional expert inputs if provided
+        if lune_features is not None:
+            lune_features = lune_features.to(dtype=model_dtype)
+        if sol_stats is not None:
+            sol_stats = sol_stats.to(dtype=model_dtype)
+        if sol_spatial is not None:
+            sol_spatial = sol_spatial.to(dtype=model_dtype)
+        if guidance is not None:
+            guidance = guidance.to(dtype=model_dtype)
         # Input projections
         img = self.img_in(hidden_states)
         txt = self.txt_in(encoder_hidden_states)