| |
|
| |
|
| | """
|
| | Shared Subspace Decoder Models
|
| |
|
| | This module contains the implementation of the Shared Subspace Decoder architecture,
|
| | including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
|
| | """
|
| |
|
| | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
|
| |
|
| | from .configuration_shared_subspace_decoder import SharedSpaceDecoderConfig
|
| | from .modeling_shared_subspace_decoder import (
|
| | SharedSpaceDecoderPreTrainedModel,
|
| | SharedSpaceDecoderModel,
|
| | )
|
| |
|
| |
|
| | from ..layers.task_heads import SharedSpaceDecoderForCausalLM
|
| |
|
| |
|
| | AutoConfig.register("shared_subspace_decoder", SharedSpaceDecoderConfig)
|
| |
|
| |
|
| | AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel)
|
| | AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM)
|
| |
|
| | __all__ = [
|
| | "SharedSpaceDecoderConfig",
|
| | "SharedSpaceDecoderPreTrainedModel",
|
| | "SharedSpaceDecoderModel",
|
| | "SharedSpaceDecoderForCausalLM",
|
| | ]
|
| |
|