Upload DeepseekV4ForCausalLM

#4
by qgallouedec HF Staff - opened
Files changed (2) hide show
  1. config.json +4 -3
  2. model.safetensors +2 -2
config.json CHANGED
@@ -6,6 +6,7 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "compress_ratios": [
 
9
  0,
10
  4,
11
  128
@@ -18,7 +19,7 @@
18
  "compress_rope_theta": 160000.0,
19
  "dtype": "bfloat16",
20
  "eos_token_id": 1,
21
- "first_k_dense_replace": 1,
22
  "hc_eps": 1e-06,
23
  "hc_mult": 4,
24
  "hc_sinkhorn_iters": 20,
@@ -40,8 +41,8 @@
40
  "norm_topk_prob": true,
41
  "num_attention_heads": 4,
42
  "num_experts_per_tok": 2,
43
- "num_hash_layers": 2,
44
- "num_hidden_layers": 3,
45
  "num_key_value_heads": 2,
46
  "num_nextn_predict_layers": 1,
47
  "o_groups": 8,
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "compress_ratios": [
9
+ 0,
10
  0,
11
  4,
12
  128
 
19
  "compress_rope_theta": 160000.0,
20
  "dtype": "bfloat16",
21
  "eos_token_id": 1,
22
+ "first_k_dense_replace": 2,
23
  "hc_eps": 1e-06,
24
  "hc_mult": 4,
25
  "hc_sinkhorn_iters": 20,
 
41
  "norm_topk_prob": true,
42
  "num_attention_heads": 4,
43
  "num_experts_per_tok": 2,
44
+ "num_hash_layers": 3,
45
+ "num_hidden_layers": 4,
46
  "num_key_value_heads": 2,
47
  "num_nextn_predict_layers": 1,
48
  "o_groups": 8,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5c0682e334f5e114dc223c4196864ef67442228dcfe4eec24c953d386ecd5e6
3
- size 52073390
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb246e5c57a24315997e93d55e759f78457690fe73efc19e5e58be0bb0f28a5
3
+ size 37758460