| { |
| "_name_or_path": "super_linear", |
| "architectures": [ |
| "SuperLinearForCausalLM" |
| ], |
| "auto_map": { |
| "AutoConfig": "configuration_super_linear.SuperLinearConfig", |
| "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM" |
| }, |
| "auto_regressive": 1, |
| "con": 0, |
| "d_model": 128, |
| "dropout": 0.0, |
| "fft_len": 10000, |
| "freeze_experts": 1, |
| "freq_experts": "mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600", |
| "inf_pred_len": 96, |
| "ker_len": 50, |
| "layer_type": "RLinear", |
| "linear_checkpoints_dir": "checkpoints5", |
| "linear_checkpoints_path": "/cs/azencot_fsas/MoE/", |
| "load_linear": 0, |
| "manual_moe": 0, |
| "max_horizon": 96, |
| "misc_moe": 1, |
| "mlp_gating": 1, |
| "model_type": "super_linear", |
| "moe": 1, |
| "moe_n_experts": 8, |
| "moe_temp": 1, |
| "noisy_gating_std": 0.1, |
| "noisy_gating_std_decay": 1, |
| "pred_len": 96, |
| "seq_len": 512, |
| "top_k_experts": 5, |
| "torch_dtype": "float32", |
| "transformers_version": "4.40.1", |
| "use_fft": 1 |
| } |