| | from typing import List, Optional |
| | from transformers import PretrainedConfig |
| |
|
| |
|
| | class RZCompressionConfig(PretrainedConfig): |
| | """ |
| | Configuration for the roberta_zinc embedding-compression models. |
| | |
| | Args: |
| | input_size (int): Dimension of the input embedding. |
| | compression_sizes (List[int]): One or more output dimensions. |
| | encoder_layers (int): Number of FeedForwardLayers in the encoder path. |
| | decoder_layers (int): Number of FeedForwardLayers in the optional decoder. |
| | dropout (float): Drop-out prob in every layer except the final ones. |
| | layer_norm_eps (float | None): Epsilon for LayerNorm. |
| | mse_loss_weight (float): Weight for MSE loss on base-to-compressed similarity matrices |
| | pearson_loss_weight (float): Weight for Pearson loss on base-to-compressed similarity matrices |
| | topk_values (List[int]): Top-k values for weighting mse/pearson loss |
| | decoder_cosine_weight (float): weight for decoder cosine similarity loss |
| | """ |
| | model_type = "roberta_zinc_compression_encoder" |
| |
|
| | def __init__( |
| | self, |
| | |
| | input_size: int = 768, |
| | compression_sizes: List[int] = (32, 64, 128, 256, 512), |
| | encoder_layers: int = 2, |
| | decoder_layers: int = 2, |
| | dropout: float = 0.1, |
| | layer_norm_eps: Optional[float] = 1e-12, |
| | |
| | mse_loss_weight: float = 0.0, |
| | pearson_loss_weight: float = 0.0, |
| | topk_values: list[int] = (10, 100), |
| | decoder_cosine_weight: float = 0.0, |
| | **kwargs, |
| | ): |
| | self.input_size = input_size |
| | self.compression_sizes = list(compression_sizes) |
| | self.encoder_layers = encoder_layers |
| | self.decoder_layers = decoder_layers |
| | self.dropout = dropout |
| | self.layer_norm_eps = layer_norm_eps |
| | self.mse_loss_weight = mse_loss_weight |
| | self.topk_values = topk_values |
| | self.pearson_loss_weight = pearson_loss_weight |
| | self.decoder_cosine_weight = decoder_cosine_weight |
| | super().__init__(**kwargs) |