reaperdoesntknow commited on
Commit
efb471d
·
verified ·
1 Parent(s): b1501de

Upload MoAMetricLM

Browse files
Files changed (2) hide show
  1. config.json +3 -4
  2. model.safetensors +2 -2
config.json CHANGED
@@ -35,7 +35,7 @@
35
  "lm_proj_drop": 0.1,
36
  "lm_router_dropout": 0.1,
37
  "lm_router_hidden": 64,
38
- "lm_router_temperature": 1.5,
39
  "lr_rank": 32,
40
  "maha_init": 1.0,
41
  "max_position_embeddings": 8192,
@@ -53,10 +53,10 @@
53
  "pad_token_id": 0,
54
  "proj_drop": 0.1,
55
  "r_basis": 16,
56
- "radius_init": 4.0,
57
  "router_dropout": 0.1,
58
  "router_hidden": 128,
59
- "router_temperature": 1.0,
60
  "router_topk": 2,
61
  "theta_base": 10000.0,
62
  "ti_reg_samples": 64,
@@ -64,7 +64,6 @@
64
  "tie_word_embeddings": true,
65
  "transformers_version": "5.0.0",
66
  "use_balls": false,
67
- "use_cache": false,
68
  "vocab_size": 50277,
69
  "window_size": 512
70
  }
 
35
  "lm_proj_drop": 0.1,
36
  "lm_router_dropout": 0.1,
37
  "lm_router_hidden": 64,
38
+ "lm_router_temperature": 1.0,
39
  "lr_rank": 32,
40
  "maha_init": 1.0,
41
  "max_position_embeddings": 8192,
 
53
  "pad_token_id": 0,
54
  "proj_drop": 0.1,
55
  "r_basis": 16,
56
+ "radius_init": 5.0,
57
  "router_dropout": 0.1,
58
  "router_hidden": 128,
59
+ "router_temperature": 1.25,
60
  "router_topk": 2,
61
  "theta_base": 10000.0,
62
  "ti_reg_samples": 64,
 
64
  "tie_word_embeddings": true,
65
  "transformers_version": "5.0.0",
66
  "use_balls": false,
 
67
  "vocab_size": 50277,
68
  "window_size": 512
69
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47e76b52d39b9b2f99a15083e2d9a96941634d9ba21affd52bfc4f1055c0d0cb
3
- size 218651604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e7d584efd55a94ba084640d9f61ccdfa1dcce3375b825c43c17fc9789c1b5a
3
+ size 218656860