| |
|
|
| """ |
| Shared Subspace Decoder Models |
| |
| This module contains the implementation of the Shared Subspace Decoder architecture, |
| including Multi-Head Latent Attention (MLA) and decomposed MLP layers. |
| """ |
|
|
| print("\n========================================\n") |
| print(" models/__init__.py: Is this being run?") |
| print("\n========================================\n") |
|
|
| from transformers import AutoConfig, AutoModel, AutoModelForCausalLM |
|
|
| from .shared_space_config import SharedSpaceDecoderConfig |
| from .shared_space_decoder import ( |
| SharedSpaceDecoderPreTrainedModel, |
| SharedSpaceDecoderModel, |
| ) |
|
|
| |
| from ..layers.task_heads import SharedSpaceDecoderForCausalLM |
|
|
| |
| AutoConfig.register("shared_space_decoder", SharedSpaceDecoderConfig) |
|
|
| |
| AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel) |
| AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM) |
|
|
| __all__ = [ |
| "SharedSpaceDecoderConfig", |
| "SharedSpaceDecoderPreTrainedModel", |
| "SharedSpaceDecoderModel", |
| "SharedSpaceDecoderForCausalLM", |
| ] |
|
|