OpenOneRec commited on
Commit
fec7979
·
verified ·
1 Parent(s): a4e273f

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +13 -14
  2. generation_config.json +6 -4
config.json CHANGED
@@ -8,39 +8,38 @@
8
  "eos_token_id": 151643,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 6144,
14
  "max_position_embeddings": 131072,
15
- "max_window_layers": 24,
16
- "mix_coeff": 1.0,
17
  "model_type": "qwen3",
18
- "num_attention_heads": 16,
19
- "num_hidden_layers": 24,
20
- "num_key_value_heads": 16,
21
  "rms_norm_eps": 1e-06,
22
  "rope_scaling": null,
23
- "rope_theta": 10000,
24
  "sliding_window": null,
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.51.0",
28
  "use_cache": true,
29
  "use_sliding_window": false,
30
- "vocab_size": 151936,
31
  "summary_token_begin": 151936,
32
  "summary_chunk_size": 8,
33
  "summary_token_num": 1,
34
  "use_summary_attention": true,
35
- "summary_sliding_chunk_num": "([128]*3+[1024]*1)*6",
 
36
  "summary_chunk_position_ids_type": "origin",
37
  "summary_token_position_ids_type": "last_chunk_slice_right",
38
  "summary_independent_parameters": true,
39
  "summary_independent_attention_layernorm": false,
40
- "summary_attention_mode": "kernel",
41
  "auto_map": {
42
- "AutoModel": "modeling_qwen3sa.Qwen3Model",
43
- "AutoModelForCausalLM": "modeling_qwen3sa.Qwen3ForCausalLM"
44
  }
45
  }
46
-
 
8
  "eos_token_id": 151643,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
  "max_position_embeddings": 131072,
15
+ "max_window_layers": 36,
16
+ "mix_coeff": 0,
17
  "model_type": "qwen3",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 36,
20
+ "num_key_value_heads": 8,
21
  "rms_norm_eps": 1e-06,
22
  "rope_scaling": null,
23
+ "rope_theta": 1000000,
24
  "sliding_window": null,
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.51.0",
28
  "use_cache": true,
29
  "use_sliding_window": false,
30
+ "vocab_size": 151937,
31
  "summary_token_begin": 151936,
32
  "summary_chunk_size": 8,
33
  "summary_token_num": 1,
34
  "use_summary_attention": true,
35
+ "summary_sliding_chunk_num": "([128]*3+[16768]*1)*9",
36
+ "summary_layer_freq": "([1])*36",
37
  "summary_chunk_position_ids_type": "origin",
38
  "summary_token_position_ids_type": "last_chunk_slice_right",
39
  "summary_independent_parameters": true,
40
  "summary_independent_attention_layernorm": false,
 
41
  "auto_map": {
42
+ "AutoModel": "modeling_qwen3.Qwen3Model",
43
+ "AutoModelForCausalLM": "modeling_qwen3.Qwen3ForCausalLM"
44
  }
45
  }
 
generation_config.json CHANGED
@@ -1,7 +1,9 @@
1
  {
 
2
  "bos_token_id": 151643,
3
- "do_sample": false,
4
  "eos_token_id": 151643,
5
- "max_new_tokens": 2048,
6
- "transformers_version": "4.37.0"
7
- }
 
 
 
1
  {
2
+ "_from_model_config": true,
3
  "bos_token_id": 151643,
 
4
  "eos_token_id": 151643,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
7
+ "transformers_version": "5.0.0",
8
+ "use_cache": true
9
+ }